In [1]:
from io import StringIO
import requests
import seaborn
import json
import pandas as pd
import re
from collections import defaultdict
import timeit
import matplotlib.pyplot
import numpy
%matplotlib inline
In [2]:
df_data_2 = pd.read_csv('h1b_kaggle.csv')
In [3]:
df_data_2.head(10)
Out[3]:
Unnamed: 0
CASE_STATUS
EMPLOYER_NAME
SOC_NAME
JOB_TITLE
FULL_TIME_POSITION
PREVAILING_WAGE
YEAR
WORKSITE
lon
lat
0
1
CERTIFIED-WITHDRAWN
UNIVERSITY OF MICHIGAN
BIOCHEMISTS AND BIOPHYSICISTS
POSTDOCTORAL RESEARCH FELLOW
N
36067.0
2016.0
ANN ARBOR, MICHIGAN
-83.743038
42.280826
1
2
CERTIFIED-WITHDRAWN
GOODMAN NETWORKS, INC.
CHIEF EXECUTIVES
CHIEF OPERATING OFFICER
Y
242674.0
2016.0
PLANO, TEXAS
-96.698886
33.019843
2
3
CERTIFIED-WITHDRAWN
PORTS AMERICA GROUP, INC.
CHIEF EXECUTIVES
CHIEF PROCESS OFFICER
Y
193066.0
2016.0
JERSEY CITY, NEW JERSEY
-74.077642
40.728158
3
4
CERTIFIED-WITHDRAWN
GATES CORPORATION, A WHOLLY-OWNED SUBSIDIARY O...
CHIEF EXECUTIVES
REGIONAL PRESIDEN, AMERICAS
Y
220314.0
2016.0
DENVER, COLORADO
-104.990251
39.739236
4
5
WITHDRAWN
PEABODY INVESTMENTS CORP.
CHIEF EXECUTIVES
PRESIDENT MONGOLIA AND INDIA
Y
157518.4
2016.0
ST. LOUIS, MISSOURI
-90.199404
38.627003
5
6
CERTIFIED-WITHDRAWN
BURGER KING CORPORATION
CHIEF EXECUTIVES
EXECUTIVE V P, GLOBAL DEVELOPMENT AND PRESIDEN...
Y
225000.0
2016.0
MIAMI, FLORIDA
-80.191790
25.761680
6
7
CERTIFIED-WITHDRAWN
BT AND MK ENERGY AND COMMODITIES
CHIEF EXECUTIVES
CHIEF OPERATING OFFICER
Y
91021.0
2016.0
HOUSTON, TEXAS
-95.369803
29.760427
7
8
CERTIFIED-WITHDRAWN
GLOBO MOBILE TECHNOLOGIES, INC.
CHIEF EXECUTIVES
CHIEF OPERATIONS OFFICER
Y
150000.0
2016.0
SAN JOSE, CALIFORNIA
-121.886329
37.338208
8
9
CERTIFIED-WITHDRAWN
ESI COMPANIES INC.
CHIEF EXECUTIVES
PRESIDENT
Y
127546.0
2016.0
MEMPHIS, TEXAS
NaN
NaN
9
10
WITHDRAWN
LESSARD INTERNATIONAL LLC
CHIEF EXECUTIVES
PRESIDENT
Y
154648.0
2016.0
VIENNA, VIRGINIA
-77.265260
38.901222
In [4]:
df_data_2[['CASE_STATUS', 'SOC_NAME']]
#use this to make a chart with two indexes
Out[4]:
CASE_STATUS
SOC_NAME
0
CERTIFIED-WITHDRAWN
BIOCHEMISTS AND BIOPHYSICISTS
1
CERTIFIED-WITHDRAWN
CHIEF EXECUTIVES
2
CERTIFIED-WITHDRAWN
CHIEF EXECUTIVES
3
CERTIFIED-WITHDRAWN
CHIEF EXECUTIVES
4
WITHDRAWN
CHIEF EXECUTIVES
5
CERTIFIED-WITHDRAWN
CHIEF EXECUTIVES
6
CERTIFIED-WITHDRAWN
CHIEF EXECUTIVES
7
CERTIFIED-WITHDRAWN
CHIEF EXECUTIVES
8
CERTIFIED-WITHDRAWN
CHIEF EXECUTIVES
9
WITHDRAWN
CHIEF EXECUTIVES
10
CERTIFIED-WITHDRAWN
CHIEF EXECUTIVES
11
CERTIFIED-WITHDRAWN
CHIEF EXECUTIVES
12
CERTIFIED-WITHDRAWN
CHIEF EXECUTIVES
13
CERTIFIED-WITHDRAWN
CHIEF EXECUTIVES
14
CERTIFIED-WITHDRAWN
CHIEF EXECUTIVES
15
CERTIFIED-WITHDRAWN
CHIEF EXECUTIVES
16
CERTIFIED-WITHDRAWN
FINANCIAL MANAGERS
17
CERTIFIED-WITHDRAWN
CHIEF EXECUTIVES
18
CERTIFIED
CHIEF EXECUTIVES
19
CERTIFIED
CHIEF EXECUTIVES
20
CERTIFIED-WITHDRAWN
CHIEF EXECUTIVES
21
CERTIFIED-WITHDRAWN
CHIEF EXECUTIVES
22
CERTIFIED
CHIEF EXECUTIVES
23
CERTIFIED
CHIEF EXECUTIVES
24
CERTIFIED-WITHDRAWN
CHIEF EXECUTIVES
25
CERTIFIED
CHIEF EXECUTIVES
26
CERTIFIED-WITHDRAWN
CHIEF EXECUTIVES
27
CERTIFIED
CHIEF EXECUTIVES
28
CERTIFIED
CHIEF EXECUTIVES
29
CERTIFIED
CHIEF EXECUTIVES
...
...
...
3002428
WITHDRAWN
Physical Therapists
3002429
WITHDRAWN
Physical Therapists
3002430
WITHDRAWN
Computer Systems Analysts
3002431
WITHDRAWN
Computer Support Specialists
3002432
WITHDRAWN
Computer Systems Analysts
3002433
WITHDRAWN
Biochemists and Biophysicists
3002434
WITHDRAWN
Computer Systems Analysts
3002435
WITHDRAWN
Computer Programmers
3002436
WITHDRAWN
Computer Programmers
3002437
WITHDRAWN
Health Specialties Teachers, Postsecondary
3002438
WITHDRAWN
Database Administrators
3002439
WITHDRAWN
Biological Scientists, All Other
3002440
WITHDRAWN
Software Developers, Applications
3002441
WITHDRAWN
Materials Scientists
3002442
WITHDRAWN
Market Research Analysts and Marketing Special...
3002443
WITHDRAWN
Commercial and Industrial Designers
3002444
WITHDRAWN
Commercial and Industrial Designers
3002445
NaN
NaN
3002446
NaN
NaN
3002447
NaN
NaN
3002448
NaN
NaN
3002449
NaN
NaN
3002450
NaN
NaN
3002451
NaN
NaN
3002452
NaN
NaN
3002453
NaN
NaN
3002454
NaN
NaN
3002455
NaN
NaN
3002456
NaN
NaN
3002457
NaN
NaN
3002458 rows × 2 columns
In [5]:
df_data_2['SOC_NAME'].value_counts()
Out[5]:
Computer Systems Analysts 291170
Computer Programmers 226574
SOFTWARE DEVELOPERS, APPLICATIONS 221783
COMPUTER SYSTEMS ANALYSTS 215353
Software Developers, Applications 192933
COMPUTER PROGRAMMERS 171972
COMPUTER OCCUPATIONS, ALL OTHER 108555
Computer Occupations, All Other 69315
Software Developers, Systems Software 44500
SOFTWARE DEVELOPERS, SYSTEMS SOFTWARE 39356
Management Analysts 38756
Financial Analysts 34141
Computer Software Engineers, Applications 33325
Accountants and Auditors 30708
MANAGEMENT ANALYSTS 28117
Mechanical Engineers 26219
Computer Occupations, All Other* 25628
Electronics Engineers, Except Computer 23323
Electrical Engineers 22162
Market Research Analysts and Marketing Specialists 22158
ACCOUNTANTS AND AUDITORS 22114
Physicians and Surgeons, All Other 22010
Database Administrators 21937
NETWORK AND COMPUTER SYSTEMS ADMINISTRATORS 21642
FINANCIAL ANALYSTS 19399
Operations Research Analysts 19171
Network and Computer Systems Administrators 18445
MECHANICAL ENGINEERS 18017
COMPUTER SYSTEMS ANALYST 17426
DATABASE ADMINISTRATORS 17045
...
OCCUPATIONAL THERAPY ASSISTANTS 1
Service Unit Operators, Oil, Gas, and Mining 1
Dietitians and Nutritionists, R&D (ACWIA Only) 1
MEDICAL TECHNOLOGISTS 1
SYSTEMS ENGINEERS/ARCHITECTS 1
ENGLISH LANGUAGE & LIT TEACHERS, POST SECONDARY 1
SECURITIES, AND COMMODITIES TRADERS 1
FOREIGN LANGUAGE LITERATURE TEACHERS, POSTSECOND 1
FOREIGN LANGUAGE AND LITERATURE TEACHERS, POST SEC 1
SOFTWARE DEVELOPERS, APPLICATION 1
SECURITIES COMMODITIES FINANCIAL SERV SALES AGENTS 1
MEDICAL SCIENTISTS EXCEPT EPIDIMIOLOGISTS 1
HYDROLOGISITS 1
Earth Drillers, Except Oil and Gas 1
Hazardous Materials Removal Workers 1
ELECTRONICS ENGINEERS; EXCEPT COMPUTER 1
FOREIGN LANGUAGE AND LITERATURE TEACHERS, POSTSECONDARY 1
Funeral Attendants 1
TUTORS 1
27-3031 1
MARKET RESEARCH ANALYSTS AND MARKETING SPECIALIST 1
HEALTH SPECIALITY TEACHERS, POSTSECONDARY 1
ART, DRAMA, AND MUSIC TEACHER, POSTSECONDARY 1
ENVIRONMENTAL SCIENTIST 1
TRAINING AND DEVELOPMENT MANAGER 1
SALES AND RELATED WORKERS, ALL OTHER* 1
MEDICAL SCIENTIST, EXCEPT EPIDEMIOLOGIST 1
COMPUTER PROGRAMMGER 1
OPERATIONS SYSTEMS ANALYST 1
BUSINESS OPERATIONS SPECIALIST, ALL OTHERS 1
Name: SOC_NAME, dtype: int64
In [8]:
df_data_2['SOC_NAME'].value_counts().count()
Out[8]:
2132
In [9]:
cleandata1=df_data_2.copy()
In [10]:
cleandata1['SOC_NAME']=df_data_2['SOC_NAME'].str.lower()
cleandata1['EMPLOYER_NAME']=df_data_2['EMPLOYER_NAME'].str.lower()
cleandata1['JOB_TITLE']=df_data_2['JOB_TITLE'].str.lower()
In [13]:
cleandata1.to_csv('h1b_kaggle_lowwered.csv', sep='\t')
Here, we transform some strings to lowercase. This is because there are duplicate entries in the dataset which in both upper and lower. This increases redundancy
In [9]:
cleandata1['SOC_NAME'].value_counts()
Out[9]:
computer systems analysts 506523
software developers, applications 414716
computer programmers 398546
computer occupations, all other 177870
software developers, systems software 83856
management analysts 66873
financial analysts 53540
accountants and auditors 52822
mechanical engineers 44236
network and computer systems administrators 40087
database administrators 38982
market research analysts and marketing specialists 37737
electronics engineers, except computer 36574
operations research analysts 34260
electrical engineers 34108
physicians and surgeons, all other 33526
computer software engineers, applications 33387
computer and information systems managers 27536
computer occupations, all other* 26254
medical scientists, except epidemiologists 26159
physical therapists 21994
biochemists and biophysicists 21245
industrial engineers 19370
computer systems analyst 17426
statisticians 17101
biological scientists, all other 16367
marketing managers 16310
civil engineers 15970
web developers 15000
internists, general 13367
...
urologists 1
financial analysis 1
commerical and industrial designers 1
17-2051 1
training and development manager 1
job printers 1
occupational health and safety specialists and tec 1
finance managers 1
electonics engineers, except computer 1
industrial designers 1
software developers, applications, non r&d 1
property real estate & community association mgrs 1
business systems analysts 1
secondary school teachers, except special and vocational education 1
chemist 1
health speciality teacher 1
pediatrician 1
computer systems engineers/arquitects 1
medical and clincial laboratory technologists 1
computer systems engineers/architect 1
lodging manager 1
new accounts clerks 1
designer, all other 1
atmospheric, earth, marine, & space sciences teach 1
network & computer systems administrator 1
foreign language and literature teachers, post sec 1
computer occuptations, all other 1
engineering teachers postsecondary 1
cashiers 1
business intelligence anaylsts 1
Name: SOC_NAME, dtype: int64
There is still alot of redundancy we can exploit. We can generalize these strings remove specalized strings into more general form. For example 'software engineer, senior' needs to be reduced to 'software engineer'. This also applies to the other columns with string attributes.
In [10]:
cleandata1['SOC_NAME'].value_counts().count()
Out[10]:
1585
We have reduced the number of names down to 1585
In [11]:
reducedf = pd.DataFrame({'SOC_NAME': cleandata1['SOC_NAME'].value_counts().index, 'Count':cleandata1['SOC_NAME'].value_counts().values})
#df['Counts'] = df.groupby(['SOC_NAME'])['Count'].transform('count') #I don't remember what I was trying to do here.
#df = df.set_index(['SOC_NAME'])
reducedf
Out[11]:
Count
SOC_NAME
0
506523
computer systems analysts
1
414716
software developers, applications
2
398546
computer programmers
3
177870
computer occupations, all other
4
83856
software developers, systems software
5
66873
management analysts
6
53540
financial analysts
7
52822
accountants and auditors
8
44236
mechanical engineers
9
40087
network and computer systems administrators
10
38982
database administrators
11
37737
market research analysts and marketing special...
12
36574
electronics engineers, except computer
13
34260
operations research analysts
14
34108
electrical engineers
15
33526
physicians and surgeons, all other
16
33387
computer software engineers, applications
17
27536
computer and information systems managers
18
26254
computer occupations, all other*
19
26159
medical scientists, except epidemiologists
20
21994
physical therapists
21
21245
biochemists and biophysicists
22
19370
industrial engineers
23
17426
computer systems analyst
24
17101
statisticians
25
16367
biological scientists, all other
26
16310
marketing managers
27
15970
civil engineers
28
15000
web developers
29
13367
internists, general
...
...
...
1555
1
urologists
1556
1
financial analysis
1557
1
commerical and industrial designers
1558
1
17-2051
1559
1
training and development manager
1560
1
job printers
1561
1
occupational health and safety specialists and...
1562
1
finance managers
1563
1
electonics engineers, except computer
1564
1
industrial designers
1565
1
software developers, applications, non r&d
1566
1
property real estate & community association mgrs
1567
1
business systems analysts
1568
1
secondary school teachers, except special and ...
1569
1
chemist
1570
1
health speciality teacher
1571
1
pediatrician
1572
1
computer systems engineers/arquitects
1573
1
medical and clincial laboratory technologists
1574
1
computer systems engineers/architect
1575
1
lodging manager
1576
1
new accounts clerks
1577
1
designer, all other
1578
1
atmospheric, earth, marine, & space sciences t...
1579
1
network & computer systems administrator
1580
1
foreign language and literature teachers, post...
1581
1
computer occuptations, all other
1582
1
engineering teachers postsecondary
1583
1
cashiers
1584
1
business intelligence anaylsts
1585 rows × 2 columns
In [12]:
reducedf['Name1'] = ''
In [13]:
reducedf
Out[13]:
Count
SOC_NAME
Name1
0
506523
computer systems analysts
1
414716
software developers, applications
2
398546
computer programmers
3
177870
computer occupations, all other
4
83856
software developers, systems software
5
66873
management analysts
6
53540
financial analysts
7
52822
accountants and auditors
8
44236
mechanical engineers
9
40087
network and computer systems administrators
10
38982
database administrators
11
37737
market research analysts and marketing special...
12
36574
electronics engineers, except computer
13
34260
operations research analysts
14
34108
electrical engineers
15
33526
physicians and surgeons, all other
16
33387
computer software engineers, applications
17
27536
computer and information systems managers
18
26254
computer occupations, all other*
19
26159
medical scientists, except epidemiologists
20
21994
physical therapists
21
21245
biochemists and biophysicists
22
19370
industrial engineers
23
17426
computer systems analyst
24
17101
statisticians
25
16367
biological scientists, all other
26
16310
marketing managers
27
15970
civil engineers
28
15000
web developers
29
13367
internists, general
...
...
...
...
1555
1
urologists
1556
1
financial analysis
1557
1
commerical and industrial designers
1558
1
17-2051
1559
1
training and development manager
1560
1
job printers
1561
1
occupational health and safety specialists and...
1562
1
finance managers
1563
1
electonics engineers, except computer
1564
1
industrial designers
1565
1
software developers, applications, non r&d
1566
1
property real estate & community association mgrs
1567
1
business systems analysts
1568
1
secondary school teachers, except special and ...
1569
1
chemist
1570
1
health speciality teacher
1571
1
pediatrician
1572
1
computer systems engineers/arquitects
1573
1
medical and clincial laboratory technologists
1574
1
computer systems engineers/architect
1575
1
lodging manager
1576
1
new accounts clerks
1577
1
designer, all other
1578
1
atmospheric, earth, marine, & space sciences t...
1579
1
network & computer systems administrator
1580
1
foreign language and literature teachers, post...
1581
1
computer occuptations, all other
1582
1
engineering teachers postsecondary
1583
1
cashiers
1584
1
business intelligence anaylsts
1585 rows × 3 columns
In [14]:
reducedf.iloc[3]['Count'] #example of accessing a location
Out[14]:
177870
In [15]:
%%timeit
for index, row in reducedf.iterrows():
names = row['SOC_NAME'].split(",")
if(names[0].endswith('*')):
reducedf.set_value([index],['Name1'],(names[0][:-1]))
if not (names[0].endswith('s')):
reducedf.set_value([index],['Name1'],(names[0]+'s'))
else:
reducedf.set_value([index],['Name1'],names[0])
1 loop, best of 3: 919 ms per loop
In [16]:
reducedf
Out[16]:
Count
SOC_NAME
Name1
0
506523
computer systems analysts
computer systems analysts
1
414716
software developers, applications
software developers
2
398546
computer programmers
computer programmers
3
177870
computer occupations, all other
computer occupations
4
83856
software developers, systems software
software developers
5
66873
management analysts
management analysts
6
53540
financial analysts
financial analysts
7
52822
accountants and auditors
accountants and auditors
8
44236
mechanical engineers
mechanical engineers
9
40087
network and computer systems administrators
network and computer systems administrators
10
38982
database administrators
database administrators
11
37737
market research analysts and marketing special...
market research analysts and marketing special...
12
36574
electronics engineers, except computer
electronics engineers
13
34260
operations research analysts
operations research analysts
14
34108
electrical engineers
electrical engineers
15
33526
physicians and surgeons, all other
physicians and surgeons
16
33387
computer software engineers, applications
computer software engineers
17
27536
computer and information systems managers
computer and information systems managers
18
26254
computer occupations, all other*
computer occupations
19
26159
medical scientists, except epidemiologists
medical scientists
20
21994
physical therapists
physical therapists
21
21245
biochemists and biophysicists
biochemists and biophysicists
22
19370
industrial engineers
industrial engineers
23
17426
computer systems analyst
computer systems analysts
24
17101
statisticians
statisticians
25
16367
biological scientists, all other
biological scientists
26
16310
marketing managers
marketing managers
27
15970
civil engineers
civil engineers
28
15000
web developers
web developers
29
13367
internists, general
internists
...
...
...
...
1555
1
urologists
urologists
1556
1
financial analysis
financial analysis
1557
1
commerical and industrial designers
commerical and industrial designers
1558
1
17-2051
17-2051s
1559
1
training and development manager
training and development managers
1560
1
job printers
job printers
1561
1
occupational health and safety specialists and...
occupational health and safety specialists and...
1562
1
finance managers
finance managers
1563
1
electonics engineers, except computer
electonics engineers
1564
1
industrial designers
industrial designers
1565
1
software developers, applications, non r&d
software developers
1566
1
property real estate & community association mgrs
property real estate & community association mgrs
1567
1
business systems analysts
business systems analysts
1568
1
secondary school teachers, except special and ...
secondary school teachers
1569
1
chemist
chemists
1570
1
health speciality teacher
health speciality teachers
1571
1
pediatrician
pediatricians
1572
1
computer systems engineers/arquitects
computer systems engineers/arquitects
1573
1
medical and clincial laboratory technologists
medical and clincial laboratory technologists
1574
1
computer systems engineers/architect
computer systems engineers/architects
1575
1
lodging manager
lodging managers
1576
1
new accounts clerks
new accounts clerks
1577
1
designer, all other
designers
1578
1
atmospheric, earth, marine, & space sciences t...
atmospherics
1579
1
network & computer systems administrator
network & computer systems administrators
1580
1
foreign language and literature teachers, post...
foreign language and literature teachers
1581
1
computer occuptations, all other
computer occuptations
1582
1
engineering teachers postsecondary
engineering teachers postsecondarys
1583
1
cashiers
cashiers
1584
1
business intelligence anaylsts
business intelligence anaylsts
1585 rows × 3 columns
In [17]:
cleandata1['SOC_NAME'].value_counts().count()
Out[17]:
1585
In [18]:
(cleandata1.loc[(cleandata1['SOC_NAME']=='software developers, appllications')]) #an example of a query
Out[18]:
Unnamed: 0
CASE_STATUS
EMPLOYER_NAME
SOC_NAME
JOB_TITLE
FULL_TIME_POSITION
PREVAILING_WAGE
YEAR
WORKSITE
lon
lat
894328
894329
CERTIFIED
meridiansoft, inc.
software developers, appllications
software developer applications
Y
62421.0
2015.0
LEWIS CENTER, OHIO
-83.010099
40.198388
This person messed up the SOC_NAME
In [19]:
reducedf['Name1'].value_counts()
Out[19]:
software developers 37
securities 14
secondary school teachers 14
electronics engineers 13
sales representatives 13
computer occupations 11
computer software engineers 10
elementary school teachers 10
computer systems analysts 10
mechanical engineers 9
special education teachers 8
education administrators 7
medical scientists 7
computer programmers 7
middle school teachers 7
electrical engineers 7
civil engineers 6
cooks 6
foreign language and literature teachers 6
computer network architects 5
dentists 5
computer hardware engineers 5
atmospherics 5
meetings 4
purchasing agents 4
biological scientists 4
healthcare practitioners and technical workers 4
educationals 4
internists 4
engineers 4
..
farm and home management advisors 1
elementary teachers 1
orthotists and prosthetists 1
first-line supervisors of retail sales workers 1
softwware developers 1
network and systems administrators 1
it project managers 1
interpreters and translators 1
agricultural and food scientists 1
psychologists 1
foreign language/lit. teachers 1
emergency medical technicians and paramedics 1
public relations and fundraising managers 1
cardiovascular technologists and technicians 1
welders 1
occupational therapy assistants 1
nursing instructors and teachers 1
floral designers 1
residential advisors 1
machine feeders and offbearers 1
family practice physicians 1
solutions architects 1
application programmers 1
mental health and substance abuse social workers 1
psychology teachers 1
supply chain managers 1
title examiners 1
painters and illustrators 1
jewelers 1
financial advisor/accountants 1
Name: Name1, dtype: int64
In [20]:
reducedf['Name1'].value_counts().count()
Out[20]:
1134
We have now cut the number of names in half from the original number.
In [21]:
reducedf.sort_values(['Name1'])
Out[21]:
Count
SOC_NAME
Name1
1370
1
13-2011.01
13-2011.01s
1109
2
15-1121
15-1121s
1189
1
15-1132
15-1132s
1326
1
15-1199.01 sw quality assurance engnrs & testers
15-1199.01 sw quality assurance engnrs & testers
1429
1
15-1199.01
15-1199.01s
1337
1
15-1199.08, business intelligence analysts
15-1199.08s
1558
1
17-2051
17-2051s
1472
1
17-2072
17-2072s
1340
1
27-3031
27-3031s
1509
1
29-1064.00-obstetricians and gynecologists
29-1064.00-obstetricians and gynecologists
1426
1
<font><font>carpinteros</font></font>
<font><font>carpinteros</font></font>s
1055
2
able seamen
able seamens
74
3958
accountants
accountants
1046
2
accountant
accountants
7
52822
accountants and auditors
accountants and auditors
565
26
actors
actors
80
3486
actuaries
actuaries
1461
1
acupuncturists
acupuncturists
893
4
adhesive bonding machine operators and tenders
adhesive bonding machine operators and tenders
823
6
administrative law judges, adjudicators, and h...
administrative law judges
152
1361
administrative services managers
administrative services managers
323
230
adult basic and secondary education and litera...
adult basic and secondary education and litera...
395
116
adult basic and secondary education and literacy
adult basic and secondary education and literacys
1057
2
adult basic second educ and lit teach and instruc
adult basic second educ and lit teach and inst...
485
50
adult literacy, remedial education, and ged te...
adult literacys
1484
1
advertising and promortions managers
advertising and promortions managers
94
2925
advertising and promotions managers
advertising and promotions managers
1164
2
advertising and promotions manager
advertising and promotions managers
342
195
advertising sales agents
advertising sales agents
351
179
aerospace engineering and operations technicians
aerospace engineering and operations technicians
...
...
...
...
1555
1
urologists
urologists
1113
2
ushers, lobby attendants, and ticket takers
ushers
422
90
validation engineers
validation engineers
902
4
validation engineer
validation engineers
113
2398
veterinarians
veterinarians
600
19
veterinary assistants and laboratory animal ca...
veterinary assistants and laboratory animal ca...
592
20
veterinary assistants and laboratory animal
veterinary assistants and laboratory animals
426
87
veterinary technologists and technicians
veterinary technologists and technicians
1478
1
vice president of investor relations
vice president of investor relations
981
3
video game designers
video game designers
989
3
vocational education teachers, middle school
vocational education teachers
331
215
vocational education teachers, postsecondary
vocational education teachers
632
16
vocational education teachers, secondary school
vocational education teachers
493
47
vocational education teachers postsecondary
vocational education teachers postsecondarys
636
16
watch repairers
watch repairers
888
4
water and wastewater treatment plant and syste...
water and wastewater treatment plant and syste...
956
3
water/wastewater engineers
water/wastewater engineers
437
81
web administrators
web administrators
1154
2
web developer
web developers
28
15000
web developers
web developers
1032
2
weighers, measurers, checkers, and samplers,
weighers
634
16
welders, cutters, solderers, and brazers
welders
1467
1
welding, soldering, and brazing machine setter...
weldings
1437
1
welding, soldering, and brazing machine setters,
weldings
205
746
wholesale and retail buyers, except farm products
wholesale and retail buyers
1203
1
wind energy engineers
wind energy engineers
1513
1
wind turbine service technicians
wind turbine service technicians
714
10
woodworkers, all other
woodworkers
181
981
writers and authors
writers and authors
237
557
zoologists and wildlife biologists
zoologists and wildlife biologists
1585 rows × 3 columns
In [22]:
reducedf['Name2'] = ""
In [23]:
%%timeit
regex = re.compile('[^a-z\s]')
for index, row in reducedf.iterrows():
reducedf.set_value([index],['Name2'],(regex.sub('', row['Name1'])))
1 loop, best of 3: 918 ms per loop
In [24]:
reducedf.sort_values(['Name1'])
Out[24]:
Count
SOC_NAME
Name1
Name2
1370
1
13-2011.01
13-2011.01s
s
1109
2
15-1121
15-1121s
s
1189
1
15-1132
15-1132s
s
1326
1
15-1199.01 sw quality assurance engnrs & testers
15-1199.01 sw quality assurance engnrs & testers
sw quality assurance engnrs testers
1429
1
15-1199.01
15-1199.01s
s
1337
1
15-1199.08, business intelligence analysts
15-1199.08s
s
1558
1
17-2051
17-2051s
s
1472
1
17-2072
17-2072s
s
1340
1
27-3031
27-3031s
s
1509
1
29-1064.00-obstetricians and gynecologists
29-1064.00-obstetricians and gynecologists
obstetricians and gynecologists
1426
1
<font><font>carpinteros</font></font>
<font><font>carpinteros</font></font>s
fontfontcarpinterosfontfonts
1055
2
able seamen
able seamens
able seamens
74
3958
accountants
accountants
accountants
1046
2
accountant
accountants
accountants
7
52822
accountants and auditors
accountants and auditors
accountants and auditors
565
26
actors
actors
actors
80
3486
actuaries
actuaries
actuaries
1461
1
acupuncturists
acupuncturists
acupuncturists
893
4
adhesive bonding machine operators and tenders
adhesive bonding machine operators and tenders
adhesive bonding machine operators and tenders
823
6
administrative law judges, adjudicators, and h...
administrative law judges
administrative law judges
152
1361
administrative services managers
administrative services managers
administrative services managers
323
230
adult basic and secondary education and litera...
adult basic and secondary education and litera...
adult basic and secondary education and litera...
395
116
adult basic and secondary education and literacy
adult basic and secondary education and literacys
adult basic and secondary education and literacys
1057
2
adult basic second educ and lit teach and instruc
adult basic second educ and lit teach and inst...
adult basic second educ and lit teach and inst...
485
50
adult literacy, remedial education, and ged te...
adult literacys
adult literacys
1484
1
advertising and promortions managers
advertising and promortions managers
advertising and promortions managers
94
2925
advertising and promotions managers
advertising and promotions managers
advertising and promotions managers
1164
2
advertising and promotions manager
advertising and promotions managers
advertising and promotions managers
342
195
advertising sales agents
advertising sales agents
advertising sales agents
351
179
aerospace engineering and operations technicians
aerospace engineering and operations technicians
aerospace engineering and operations technicians
...
...
...
...
...
1555
1
urologists
urologists
urologists
1113
2
ushers, lobby attendants, and ticket takers
ushers
ushers
422
90
validation engineers
validation engineers
validation engineers
902
4
validation engineer
validation engineers
validation engineers
113
2398
veterinarians
veterinarians
veterinarians
600
19
veterinary assistants and laboratory animal ca...
veterinary assistants and laboratory animal ca...
veterinary assistants and laboratory animal ca...
592
20
veterinary assistants and laboratory animal
veterinary assistants and laboratory animals
veterinary assistants and laboratory animals
426
87
veterinary technologists and technicians
veterinary technologists and technicians
veterinary technologists and technicians
1478
1
vice president of investor relations
vice president of investor relations
vice president of investor relations
981
3
video game designers
video game designers
video game designers
989
3
vocational education teachers, middle school
vocational education teachers
vocational education teachers
331
215
vocational education teachers, postsecondary
vocational education teachers
vocational education teachers
632
16
vocational education teachers, secondary school
vocational education teachers
vocational education teachers
493
47
vocational education teachers postsecondary
vocational education teachers postsecondarys
vocational education teachers postsecondarys
636
16
watch repairers
watch repairers
watch repairers
888
4
water and wastewater treatment plant and syste...
water and wastewater treatment plant and syste...
water and wastewater treatment plant and syste...
956
3
water/wastewater engineers
water/wastewater engineers
waterwastewater engineers
437
81
web administrators
web administrators
web administrators
1154
2
web developer
web developers
web developers
28
15000
web developers
web developers
web developers
1032
2
weighers, measurers, checkers, and samplers,
weighers
weighers
634
16
welders, cutters, solderers, and brazers
welders
welders
1467
1
welding, soldering, and brazing machine setter...
weldings
weldings
1437
1
welding, soldering, and brazing machine setters,
weldings
weldings
205
746
wholesale and retail buyers, except farm products
wholesale and retail buyers
wholesale and retail buyers
1203
1
wind energy engineers
wind energy engineers
wind energy engineers
1513
1
wind turbine service technicians
wind turbine service technicians
wind turbine service technicians
714
10
woodworkers, all other
woodworkers
woodworkers
181
981
writers and authors
writers and authors
writers and authors
237
557
zoologists and wildlife biologists
zoologists and wildlife biologists
zoologists and wildlife biologists
1585 rows × 4 columns
In [25]:
reducedf['Name2'].value_counts().count()
Out[25]:
1118
Striping charactors did not help much.
At this point investigated a spellchecker in python. Was not able to get something to work within enviroment.
I am now going to consider removing entries that are unique, with a count of one.
In [26]:
dfName2Check = pd.DataFrame({'Name2': reducedf['Name2'].value_counts().index, 'Count':reducedf['Name2'].value_counts().values})
In [27]:
dfName2Check
Out[27]:
Count
Name2
0
37
software developers
1
14
securities
2
14
secondary school teachers
3
13
sales representatives
4
13
electronics engineers
5
11
computer occupations
6
10
computer software engineers
7
10
elementary school teachers
8
10
computer systems analysts
9
9
mechanical engineers
10
8
s
11
8
special education teachers
12
7
education administrators
13
7
medical scientists
14
7
electrical engineers
15
7
computer programmers
16
7
middle school teachers
17
6
foreign language and literature teachers
18
6
cooks
19
6
civil engineers
20
5
dentists
21
5
computer hardware engineers
22
5
computer network architects
23
5
atmospherics
24
4
electrical and electronics repairers
25
4
compensations
26
4
business operations specialists
27
4
propertys
28
4
criminal justice and law enforcement teachers
29
4
health diagnosing and treating practitioners
...
...
...
1088
1
software develpers
1089
1
mental health and substance abuse social workers
1090
1
nursing assistants
1091
1
mdl schl teachers
1092
1
marine engineers and naval architects
1093
1
market research analysts marketing specialists
1094
1
orthotists and prosthetists
1095
1
softwware developers
1096
1
network and systems administrators
1097
1
it project managers
1098
1
interpreters and translators
1099
1
agricultural and food scientists
1100
1
psychologists
1101
1
respiratory therapy technicians
1102
1
firstline supervisorsmanagers of nonretail sales
1103
1
emergency medical technicians and paramedics
1104
1
public relations and fundraising managers
1105
1
cardiovascular technologists and technicians
1106
1
welders
1107
1
occupational therapy assistants
1108
1
nursing instructors and teachers
1109
1
floral designers
1110
1
residential advisors
1111
1
fundraisers
1112
1
machine feeders and offbearers
1113
1
firstline supervisorsmanagers
1114
1
family practice physicians
1115
1
solutions architects
1116
1
application programmers
1117
1
freight and cargo inspectors
1118 rows × 2 columns
Theres a problem, I am not getting a real representation of the occurence of names in the data. I now need to do something where I can get the actual number of occurences.
In [28]:
cleandata1
Out[28]:
Unnamed: 0
CASE_STATUS
EMPLOYER_NAME
SOC_NAME
JOB_TITLE
FULL_TIME_POSITION
PREVAILING_WAGE
YEAR
WORKSITE
lon
lat
0
1
CERTIFIED-WITHDRAWN
university of michigan
biochemists and biophysicists
postdoctoral research fellow
N
36067.00
2016.0
ANN ARBOR, MICHIGAN
-83.743038
42.280826
1
2
CERTIFIED-WITHDRAWN
goodman networks, inc.
chief executives
chief operating officer
Y
242674.00
2016.0
PLANO, TEXAS
-96.698886
33.019843
2
3
CERTIFIED-WITHDRAWN
ports america group, inc.
chief executives
chief process officer
Y
193066.00
2016.0
JERSEY CITY, NEW JERSEY
-74.077642
40.728158
3
4
CERTIFIED-WITHDRAWN
gates corporation, a wholly-owned subsidiary o...
chief executives
regional presiden, americas
Y
220314.00
2016.0
DENVER, COLORADO
-104.990251
39.739236
4
5
WITHDRAWN
peabody investments corp.
chief executives
president mongolia and india
Y
157518.40
2016.0
ST. LOUIS, MISSOURI
-90.199404
38.627003
5
6
CERTIFIED-WITHDRAWN
burger king corporation
chief executives
executive v p, global development and presiden...
Y
225000.00
2016.0
MIAMI, FLORIDA
-80.191790
25.761680
6
7
CERTIFIED-WITHDRAWN
bt and mk energy and commodities
chief executives
chief operating officer
Y
91021.00
2016.0
HOUSTON, TEXAS
-95.369803
29.760427
7
8
CERTIFIED-WITHDRAWN
globo mobile technologies, inc.
chief executives
chief operations officer
Y
150000.00
2016.0
SAN JOSE, CALIFORNIA
-121.886329
37.338208
8
9
CERTIFIED-WITHDRAWN
esi companies inc.
chief executives
president
Y
127546.00
2016.0
MEMPHIS, TEXAS
NaN
NaN
9
10
WITHDRAWN
lessard international llc
chief executives
president
Y
154648.00
2016.0
VIENNA, VIRGINIA
-77.265260
38.901222
10
11
CERTIFIED-WITHDRAWN
h.j. heinz company
chief executives
chief information officer, heinz north america
Y
182978.00
2016.0
PITTSBURGH, PENNSYLVANIA
-79.995886
40.440625
11
12
CERTIFIED-WITHDRAWN
dow corning corporation
chief executives
vice president and chief human resources officer
Y
163717.00
2016.0
MIDLAND, MICHIGAN
-84.247212
43.615583
12
13
CERTIFIED-WITHDRAWN
acushnet company
chief executives
treasurer and coo
Y
203860.80
2016.0
FAIRHAVEN, MASSACHUSETTS
NaN
NaN
13
14
CERTIFIED-WITHDRAWN
biocair, inc.
chief executives
chief commercial officer
Y
252637.00
2016.0
MIAMI, FLORIDA
-80.191790
25.761680
14
15
CERTIFIED-WITHDRAWN
newmont mining corporation
chief executives
board member
Y
105914.00
2016.0
GREENWOOD VILLAGE, COLORADO
-104.950814
39.617210
15
16
CERTIFIED-WITHDRAWN
vricon, inc.
chief executives
chief financial officer
Y
153046.00
2016.0
STERLING, VIRGINIA
-77.429130
39.006699
16
17
CERTIFIED-WITHDRAWN
cardiac science corporation
financial managers
vice president of finance
Y
90834.00
2016.0
WAUKESHA, WISCONSIN
-88.231481
43.011678
17
18
CERTIFIED-WITHDRAWN
westfield corporation
chief executives
general manager, operations
Y
164050.00
2016.0
LOS ANGELES, CALIFORNIA
-118.243685
34.052234
18
19
CERTIFIED
quicklogix llc
chief executives
ceo
Y
187200.00
2016.0
SANTA CLARA, CALIFORNIA
-121.955236
37.354108
19
20
CERTIFIED
mcchrystal group, llc
chief executives
president, northeast region
Y
241842.00
2016.0
ALEXANDRIA, VIRGINIA
-77.046921
38.804835
20
21
CERTIFIED-WITHDRAWN
cuddle barn, inc.
chief executives
chief operating officer (coo)
Y
117998.00
2016.0
COMMERCE, CALIFORNIA
-118.159793
34.000569
21
22
CERTIFIED-WITHDRAWN
westfield corporation
chief executives
general manager, operations
Y
164050.00
2016.0
LOS ANGELES, CALIFORNIA
-118.243685
34.052234
22
23
CERTIFIED
lomics, llc
chief executives
ceo
Y
99986.00
2016.0
SAN DIEGO, CALIFORNIA
-117.161084
32.715738
23
24
CERTIFIED
uc university high school education inc.
chief executives
chief financial officer
Y
99986.00
2016.0
CHULA VISTA, CALIFORNIA
-117.084196
32.640054
24
25
CERTIFIED-WITHDRAWN
vms communications llc
chief executives
chief operating officer
Y
159370.00
2016.0
MIAMI, FLORIDA
-80.191790
25.761680
25
26
CERTIFIED
quicklogix, inc.
chief executives
ceo
Y
187200.00
2016.0
SANTA CLARA, CALIFORNIA
-121.955236
37.354108
26
27
CERTIFIED-WITHDRAWN
foodessentials corporation
chief executives
chief executive officer
Y
130853.00
2016.0
CHICAGO, ILLINOIS
-87.629798
41.878114
27
28
CERTIFIED
hello inc.
chief executives
chief business officer
Y
215862.00
2016.0
SAN FRANCISCO, CALIFORNIA
-122.419415
37.774929
28
29
CERTIFIED
umbel corp
chief executives
vice president of engineering
Y
192088.00
2016.0
AUSTIN, TEXAS
-97.743061
30.267153
29
30
CERTIFIED
perspectives of freedom foundation, inc
chief executives
executive director
Y
95295.98
2016.0
WESTON, FLORIDA
-80.399775
26.100365
...
...
...
...
...
...
...
...
...
...
...
...
3002428
3002429
WITHDRAWN
avant healthcare professionals
physical therapists
physical therapist
Y
53601.60
2011.0
LEBANON, PENNSYLVANIA
NaN
NaN
3002429
3002430
WITHDRAWN
avant healthcare professionals
physical therapists
physical therapist
Y
53601.60
2011.0
LEBANON, PENNSYLVANIA
NaN
NaN
3002430
3002431
WITHDRAWN
trisync technologies, inc.
computer systems analysts
computer system analyst
Y
55245.00
2011.0
EDISON, NEW JERSEY
-74.412095
40.518715
3002431
3002432
WITHDRAWN
at last sportswear inc.
computer support specialists
computer support specialist
Y
36837.00
2011.0
SECAUCUS, NEW JERSEY
-74.056530
40.789545
3002432
3002433
WITHDRAWN
trisync technologies, inc.
computer systems analysts
computer system analyst
Y
55245.00
2011.0
EDISON, NEW JERSEY
-74.412095
40.518715
3002433
3002434
WITHDRAWN
the university of texas southwestern medical c...
biochemists and biophysicists
instructor
Y
36795.00
2011.0
DALLAS, TEXAS
-96.796988
32.776664
3002434
3002435
WITHDRAWN
trisync technologies, inc.
computer systems analysts
computer system analyst
Y
55245.00
2011.0
EDISON, NEW JERSEY
-74.412095
40.518715
3002435
3002436
WITHDRAWN
marlabs, inc
computer programmers
programmer/analyst
Y
77730.00
2011.0
DURHAM, NORTH CAROLINA
-78.898619
35.994033
3002436
3002437
WITHDRAWN
xtron software services, inc.
computer programmers
computer programmers
Y
89232.00
2011.0
SANTA CLARA, CALIFORNIA
-121.955236
37.354108
3002437
3002438
WITHDRAWN
university of mississippi medical center
health specialties teachers, postsecondary
assistant professor of anesthesiology
Y
34510.00
2011.0
JACKSON, MISSISSIPPI
-90.184810
32.298757
3002438
3002439
WITHDRAWN
canvas infotech, inc.
database administrators
data analyst
Y
53082.00
2011.0
PLEASANTON, CALIFORNIA
-121.874679
37.662431
3002439
3002440
WITHDRAWN
new york university
biological scientists, all other
adjunct associate professor
Y
37336.00
2011.0
NEW YORK, NEW YORK
-74.005941
40.712784
3002440
3002441
WITHDRAWN
oracle america, inc.
software developers, applications
software engineer (software developer 2)
Y
64800.00
2011.0
REDWOOD SHORES, CALIFORNIA
-122.245536
37.536413
3002441
3002442
WITHDRAWN
b & d dental corp.
materials scientists
materials scientist
N
70553.60
2011.0
WEST VALLEY, UTAH
-112.001050
40.691613
3002442
3002443
WITHDRAWN
medtech staffing & solutions, inc
market research analysts and marketing special...
market research analyst
Y
33800.00
2011.0
AKRON, OHIO
-81.519005
41.081445
3002443
3002444
WITHDRAWN
larsen & toubro limited
commercial and industrial designers
design engineer
Y
59800.00
2011.0
CHELMSFORD, MASSACHUSETTS
-71.367284
42.599814
3002444
3002445
WITHDRAWN
larsen & toubro limited
commercial and industrial designers
design engineer
Y
59800.00
2011.0
CHELMSFORD, MASSACHUSETTS
-71.367284
42.599814
3002445
3002446
NaN
NaN
NaN
NaN
NaN
NaN
NaN
BERKLEY HEIGHTS, NEW JERSEY
-74.431052
40.680873
3002446
3002447
NaN
NaN
NaN
NaN
NaN
NaN
NaN
SCHENECTADY , NEW YORK
-73.939569
42.814243
3002447
3002448
NaN
NaN
NaN
NaN
NaN
NaN
NaN
MOUTAIN VIEW, CALIFORNIA
-122.083851
37.386052
3002448
3002449
NaN
NaN
NaN
NaN
NaN
NaN
NaN
ST.PAUL, MINNESOTA
-93.089958
44.953703
3002449
3002450
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NEW TOWN, PENNSYLVANIA
-74.932260
40.228337
3002450
3002451
NaN
NaN
NaN
NaN
NaN
NaN
NaN
WESTMINISTER, COLORADO
-105.037205
39.836653
3002451
3002452
NaN
NaN
NaN
NaN
NaN
NaN
NaN
FREEMONT, CALIFORNIA
-121.988572
37.548270
3002452
3002453
NaN
NaN
NaN
NaN
NaN
NaN
NaN
LAVERGNE, TENNESSEE
-86.581939
36.015618
3002453
3002454
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NYC, NEW YORK
-74.005941
40.712784
3002454
3002455
NaN
NaN
NaN
NaN
NaN
NaN
NaN
SOUTH LAKE, TEXAS
-97.134178
32.941236
3002455
3002456
NaN
NaN
NaN
NaN
NaN
NaN
NaN
CLINTON, NEW JERSEY
-74.909890
40.636768
3002456
3002457
NaN
NaN
NaN
NaN
NaN
NaN
NaN
OWINGS MILL, MARYLAND
-76.780253
39.419550
3002457
3002458
NaN
NaN
NaN
NaN
NaN
NaN
NaN
ALTANTA, GEORGIA
-84.387982
33.748995
3002458 rows × 11 columns
Going to go back to this df and create a copy. Will then over write the Soc_Name with the reduced name I have.
In [29]:
cleandata2 = cleandata1.copy()
In [31]:
%%time
name = cleandata2.iloc[3002440]['SOC_NAME'] #example of accessing a location
print(name)
newname = reducedf.loc[(reducedf['SOC_NAME']==name)]
newname1 = newname.iloc[0]['Name2']
print(newname1)
software developers, applications
software developers
Wall time: 2.52 ms
Test code to test algorithm
At this point of development, I have switched to running the python notebook locally. I am having trouble with how slow the cloud service is, and I am not using spark yet.
In [38]:
%%timeit
errormap = defaultdict(list)
for index, row in cleandata2.iterrows():
name = row['SOC_NAME']
newname = reducedf.loc[(reducedf['SOC_NAME']==name)]
try:
newname1 = newname.iloc[0]['Name2']
cleandata2.set_value([index],['SOC_NAME'],newname1)
except:
quicklist = ["Something went wrong", name, newname]
errormap[index].append(quicklist)
break
1 loop, best of 3: 1.03 s per loop
Here, we try to apply what we have reduced to the dataframe. We will see if we have made any progress, and what we can do next.
However, I have a very VERY large problem. The logic to use the reduced names, takes 1 s. With about 3000000 rows, that would be about 35 days.
I have no idea how to get around this right now.
I think I might be able to do this, by converting the pandas dataframe into a spark rdd. From there I could user map and map reduce functions.
But I would need a lot more time in order to do this.
At this point. I have spent many hours on this.
I feel very defeated.
I am now going to see if there is some way I can streamline the algoritm.
In [48]:
%%timeit
for index, row in cleandata2.iterrows():
try:
cleandata2.set_value([index],['SOC_NAME'],reducedf.loc[(reducedf['SOC_NAME']==row['SOC_NAME'])].iloc[0]['Name2'])
except:
pass
break
1 loop, best of 3: 997 ms per loop
Very little gains. I don't know what to do. We may have to switch over in entire to rapidminer.
In [37]:
cleandata2['SOC_NAME'].value_counts().count()
cleandata2['SOC_NAME'].value_counts()
Out[37]:
computer systems analysts 506523
software developers, applications 414716
computer programmers 398546
computer occupations, all other 177870
software developers, systems software 83856
management analysts 66873
financial analysts 53540
accountants and auditors 52822
mechanical engineers 44236
network and computer systems administrators 40087
database administrators 38982
market research analysts and marketing specialists 37737
electronics engineers, except computer 36574
operations research analysts 34260
electrical engineers 34108
physicians and surgeons, all other 33526
computer software engineers, applications 33387
computer and information systems managers 27536
computer occupations, all other* 26254
medical scientists, except epidemiologists 26159
physical therapists 21994
biochemists and biophysicists 21245
industrial engineers 19370
computer systems analyst 17426
statisticians 17101
biological scientists, all other 16367
marketing managers 16310
civil engineers 15970
web developers 15000
internists, general 13367
...
urologists 1
financial analysis 1
commerical and industrial designers 1
17-2051 1
training and development manager 1
job printers 1
occupational health and safety specialists and tec 1
finance managers 1
electonics engineers, except computer 1
industrial designers 1
software developers, applications, non r&d 1
property real estate & community association mgrs 1
business systems analysts 1
secondary school teachers, except special and vocational education 1
chemist 1
health speciality teacher 1
pediatrician 1
computer systems engineers/arquitects 1
medical and clincial laboratory technologists 1
computer systems engineers/architect 1
lodging manager 1
new accounts clerks 1
designer, all other 1
atmospheric, earth, marine, & space sciences teach 1
network & computer systems administrator 1
foreign language and literature teachers, post sec 1
computer occuptations, all other 1
engineering teachers postsecondary 1
cashiers 1
business intelligence anaylsts 1
Name: SOC_NAME, dtype: int64
Debating what will happen if I remove all entries in the dataset with which a SOC_NAME that only occures once. How many entries will this remove? Can we ignore entries like this? What about entries with an occurence count of 2? 3? Do these entries matter? What do we do if they matter?
At this point, I will have to do as much as I can in python, and then switch over to rapidminer.
I have an idea on how I could go through the rows faster.
In [ ]:
%%timeit
num=3000000
i=1
while i < num:
try:
name = cleandata2.iloc[i]['SOC_NAME']
newname = reducedf.loc[(reducedf['SOC_NAME']==name)].iloc[0]['Name2']
#cleandata2.set_value(i,['SOC_NAME'], newname)
i=i+1
except:
pass
its the setting of the values that takes the majority of the time.
In [ ]:
In [ ]:
In [6]:
df_data_2
Out[6]:
Unnamed: 0
CASE_STATUS
EMPLOYER_NAME
SOC_NAME
JOB_TITLE
FULL_TIME_POSITION
PREVAILING_WAGE
YEAR
WORKSITE
lon
lat
0
1
CERTIFIED-WITHDRAWN
UNIVERSITY OF MICHIGAN
BIOCHEMISTS AND BIOPHYSICISTS
POSTDOCTORAL RESEARCH FELLOW
N
36067.00
2016.0
ANN ARBOR, MICHIGAN
-83.743038
42.280826
1
2
CERTIFIED-WITHDRAWN
GOODMAN NETWORKS, INC.
CHIEF EXECUTIVES
CHIEF OPERATING OFFICER
Y
242674.00
2016.0
PLANO, TEXAS
-96.698886
33.019843
2
3
CERTIFIED-WITHDRAWN
PORTS AMERICA GROUP, INC.
CHIEF EXECUTIVES
CHIEF PROCESS OFFICER
Y
193066.00
2016.0
JERSEY CITY, NEW JERSEY
-74.077642
40.728158
3
4
CERTIFIED-WITHDRAWN
GATES CORPORATION, A WHOLLY-OWNED SUBSIDIARY O...
CHIEF EXECUTIVES
REGIONAL PRESIDEN, AMERICAS
Y
220314.00
2016.0
DENVER, COLORADO
-104.990251
39.739236
4
5
WITHDRAWN
PEABODY INVESTMENTS CORP.
CHIEF EXECUTIVES
PRESIDENT MONGOLIA AND INDIA
Y
157518.40
2016.0
ST. LOUIS, MISSOURI
-90.199404
38.627003
5
6
CERTIFIED-WITHDRAWN
BURGER KING CORPORATION
CHIEF EXECUTIVES
EXECUTIVE V P, GLOBAL DEVELOPMENT AND PRESIDEN...
Y
225000.00
2016.0
MIAMI, FLORIDA
-80.191790
25.761680
6
7
CERTIFIED-WITHDRAWN
BT AND MK ENERGY AND COMMODITIES
CHIEF EXECUTIVES
CHIEF OPERATING OFFICER
Y
91021.00
2016.0
HOUSTON, TEXAS
-95.369803
29.760427
7
8
CERTIFIED-WITHDRAWN
GLOBO MOBILE TECHNOLOGIES, INC.
CHIEF EXECUTIVES
CHIEF OPERATIONS OFFICER
Y
150000.00
2016.0
SAN JOSE, CALIFORNIA
-121.886329
37.338208
8
9
CERTIFIED-WITHDRAWN
ESI COMPANIES INC.
CHIEF EXECUTIVES
PRESIDENT
Y
127546.00
2016.0
MEMPHIS, TEXAS
NaN
NaN
9
10
WITHDRAWN
LESSARD INTERNATIONAL LLC
CHIEF EXECUTIVES
PRESIDENT
Y
154648.00
2016.0
VIENNA, VIRGINIA
-77.265260
38.901222
10
11
CERTIFIED-WITHDRAWN
H.J. HEINZ COMPANY
CHIEF EXECUTIVES
CHIEF INFORMATION OFFICER, HEINZ NORTH AMERICA
Y
182978.00
2016.0
PITTSBURGH, PENNSYLVANIA
-79.995886
40.440625
11
12
CERTIFIED-WITHDRAWN
DOW CORNING CORPORATION
CHIEF EXECUTIVES
VICE PRESIDENT AND CHIEF HUMAN RESOURCES OFFICER
Y
163717.00
2016.0
MIDLAND, MICHIGAN
-84.247212
43.615583
12
13
CERTIFIED-WITHDRAWN
ACUSHNET COMPANY
CHIEF EXECUTIVES
TREASURER AND COO
Y
203860.80
2016.0
FAIRHAVEN, MASSACHUSETTS
NaN
NaN
13
14
CERTIFIED-WITHDRAWN
BIOCAIR, INC.
CHIEF EXECUTIVES
CHIEF COMMERCIAL OFFICER
Y
252637.00
2016.0
MIAMI, FLORIDA
-80.191790
25.761680
14
15
CERTIFIED-WITHDRAWN
NEWMONT MINING CORPORATION
CHIEF EXECUTIVES
BOARD MEMBER
Y
105914.00
2016.0
GREENWOOD VILLAGE, COLORADO
-104.950814
39.617210
15
16
CERTIFIED-WITHDRAWN
VRICON, INC.
CHIEF EXECUTIVES
CHIEF FINANCIAL OFFICER
Y
153046.00
2016.0
STERLING, VIRGINIA
-77.429130
39.006699
16
17
CERTIFIED-WITHDRAWN
CARDIAC SCIENCE CORPORATION
FINANCIAL MANAGERS
VICE PRESIDENT OF FINANCE
Y
90834.00
2016.0
WAUKESHA, WISCONSIN
-88.231481
43.011678
17
18
CERTIFIED-WITHDRAWN
WESTFIELD CORPORATION
CHIEF EXECUTIVES
GENERAL MANAGER, OPERATIONS
Y
164050.00
2016.0
LOS ANGELES, CALIFORNIA
-118.243685
34.052234
18
19
CERTIFIED
QUICKLOGIX LLC
CHIEF EXECUTIVES
CEO
Y
187200.00
2016.0
SANTA CLARA, CALIFORNIA
-121.955236
37.354108
19
20
CERTIFIED
MCCHRYSTAL GROUP, LLC
CHIEF EXECUTIVES
PRESIDENT, NORTHEAST REGION
Y
241842.00
2016.0
ALEXANDRIA, VIRGINIA
-77.046921
38.804835
20
21
CERTIFIED-WITHDRAWN
CUDDLE BARN, INC.
CHIEF EXECUTIVES
CHIEF OPERATING OFFICER (COO)
Y
117998.00
2016.0
COMMERCE, CALIFORNIA
-118.159793
34.000569
21
22
CERTIFIED-WITHDRAWN
WESTFIELD CORPORATION
CHIEF EXECUTIVES
GENERAL MANAGER, OPERATIONS
Y
164050.00
2016.0
LOS ANGELES, CALIFORNIA
-118.243685
34.052234
22
23
CERTIFIED
LOMICS, LLC
CHIEF EXECUTIVES
CEO
Y
99986.00
2016.0
SAN DIEGO, CALIFORNIA
-117.161084
32.715738
23
24
CERTIFIED
UC UNIVERSITY HIGH SCHOOL EDUCATION INC.
CHIEF EXECUTIVES
CHIEF FINANCIAL OFFICER
Y
99986.00
2016.0
CHULA VISTA, CALIFORNIA
-117.084196
32.640054
24
25
CERTIFIED-WITHDRAWN
VMS COMMUNICATIONS LLC
CHIEF EXECUTIVES
CHIEF OPERATING OFFICER
Y
159370.00
2016.0
MIAMI, FLORIDA
-80.191790
25.761680
25
26
CERTIFIED
QUICKLOGIX, INC.
CHIEF EXECUTIVES
CEO
Y
187200.00
2016.0
SANTA CLARA, CALIFORNIA
-121.955236
37.354108
26
27
CERTIFIED-WITHDRAWN
FOODESSENTIALS CORPORATION
CHIEF EXECUTIVES
CHIEF EXECUTIVE OFFICER
Y
130853.00
2016.0
CHICAGO, ILLINOIS
-87.629798
41.878114
27
28
CERTIFIED
HELLO INC.
CHIEF EXECUTIVES
CHIEF BUSINESS OFFICER
Y
215862.00
2016.0
SAN FRANCISCO, CALIFORNIA
-122.419415
37.774929
28
29
CERTIFIED
UMBEL CORP
CHIEF EXECUTIVES
VICE PRESIDENT OF ENGINEERING
Y
192088.00
2016.0
AUSTIN, TEXAS
-97.743061
30.267153
29
30
CERTIFIED
PERSPECTIVES OF FREEDOM FOUNDATION, INC
CHIEF EXECUTIVES
EXECUTIVE DIRECTOR
Y
95295.98
2016.0
WESTON, FLORIDA
-80.399775
26.100365
...
...
...
...
...
...
...
...
...
...
...
...
3002428
3002429
WITHDRAWN
AVANT HEALTHCARE PROFESSIONALS
Physical Therapists
PHYSICAL THERAPIST
Y
53601.60
2011.0
LEBANON, PENNSYLVANIA
NaN
NaN
3002429
3002430
WITHDRAWN
AVANT HEALTHCARE PROFESSIONALS
Physical Therapists
PHYSICAL THERAPIST
Y
53601.60
2011.0
LEBANON, PENNSYLVANIA
NaN
NaN
3002430
3002431
WITHDRAWN
TRISYNC TECHNOLOGIES, INC.
Computer Systems Analysts
COMPUTER SYSTEM ANALYST
Y
55245.00
2011.0
EDISON, NEW JERSEY
-74.412095
40.518715
3002431
3002432
WITHDRAWN
AT LAST SPORTSWEAR INC.
Computer Support Specialists
COMPUTER SUPPORT SPECIALIST
Y
36837.00
2011.0
SECAUCUS, NEW JERSEY
-74.056530
40.789545
3002432
3002433
WITHDRAWN
TRISYNC TECHNOLOGIES, INC.
Computer Systems Analysts
COMPUTER SYSTEM ANALYST
Y
55245.00
2011.0
EDISON, NEW JERSEY
-74.412095
40.518715
3002433
3002434
WITHDRAWN
THE UNIVERSITY OF TEXAS SOUTHWESTERN MEDICAL C...
Biochemists and Biophysicists
INSTRUCTOR
Y
36795.00
2011.0
DALLAS, TEXAS
-96.796988
32.776664
3002434
3002435
WITHDRAWN
TRISYNC TECHNOLOGIES, INC.
Computer Systems Analysts
COMPUTER SYSTEM ANALYST
Y
55245.00
2011.0
EDISON, NEW JERSEY
-74.412095
40.518715
3002435
3002436
WITHDRAWN
MARLABS, INC
Computer Programmers
PROGRAMMER/ANALYST
Y
77730.00
2011.0
DURHAM, NORTH CAROLINA
-78.898619
35.994033
3002436
3002437
WITHDRAWN
XTRON SOFTWARE SERVICES, INC.
Computer Programmers
COMPUTER PROGRAMMERS
Y
89232.00
2011.0
SANTA CLARA, CALIFORNIA
-121.955236
37.354108
3002437
3002438
WITHDRAWN
UNIVERSITY OF MISSISSIPPI MEDICAL CENTER
Health Specialties Teachers, Postsecondary
ASSISTANT PROFESSOR OF ANESTHESIOLOGY
Y
34510.00
2011.0
JACKSON, MISSISSIPPI
-90.184810
32.298757
3002438
3002439
WITHDRAWN
CANVAS INFOTECH, INC.
Database Administrators
DATA ANALYST
Y
53082.00
2011.0
PLEASANTON, CALIFORNIA
-121.874679
37.662431
3002439
3002440
WITHDRAWN
NEW YORK UNIVERSITY
Biological Scientists, All Other
ADJUNCT ASSOCIATE PROFESSOR
Y
37336.00
2011.0
NEW YORK, NEW YORK
-74.005941
40.712784
3002440
3002441
WITHDRAWN
ORACLE AMERICA, INC.
Software Developers, Applications
SOFTWARE ENGINEER (SOFTWARE DEVELOPER 2)
Y
64800.00
2011.0
REDWOOD SHORES, CALIFORNIA
-122.245536
37.536413
3002441
3002442
WITHDRAWN
B & D DENTAL CORP.
Materials Scientists
MATERIALS SCIENTIST
N
70553.60
2011.0
WEST VALLEY, UTAH
-112.001050
40.691613
3002442
3002443
WITHDRAWN
MEDTECH STAFFING & SOLUTIONS, INC
Market Research Analysts and Marketing Special...
MARKET RESEARCH ANALYST
Y
33800.00
2011.0
AKRON, OHIO
-81.519005
41.081445
3002443
3002444
WITHDRAWN
LARSEN & TOUBRO LIMITED
Commercial and Industrial Designers
DESIGN ENGINEER
Y
59800.00
2011.0
CHELMSFORD, MASSACHUSETTS
-71.367284
42.599814
3002444
3002445
WITHDRAWN
LARSEN & TOUBRO LIMITED
Commercial and Industrial Designers
DESIGN ENGINEER
Y
59800.00
2011.0
CHELMSFORD, MASSACHUSETTS
-71.367284
42.599814
3002445
3002446
NaN
NaN
NaN
NaN
NaN
NaN
NaN
BERKLEY HEIGHTS, NEW JERSEY
-74.431052
40.680873
3002446
3002447
NaN
NaN
NaN
NaN
NaN
NaN
NaN
SCHENECTADY , NEW YORK
-73.939569
42.814243
3002447
3002448
NaN
NaN
NaN
NaN
NaN
NaN
NaN
MOUTAIN VIEW, CALIFORNIA
-122.083851
37.386052
3002448
3002449
NaN
NaN
NaN
NaN
NaN
NaN
NaN
ST.PAUL, MINNESOTA
-93.089958
44.953703
3002449
3002450
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NEW TOWN, PENNSYLVANIA
-74.932260
40.228337
3002450
3002451
NaN
NaN
NaN
NaN
NaN
NaN
NaN
WESTMINISTER, COLORADO
-105.037205
39.836653
3002451
3002452
NaN
NaN
NaN
NaN
NaN
NaN
NaN
FREEMONT, CALIFORNIA
-121.988572
37.548270
3002452
3002453
NaN
NaN
NaN
NaN
NaN
NaN
NaN
LAVERGNE, TENNESSEE
-86.581939
36.015618
3002453
3002454
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NYC, NEW YORK
-74.005941
40.712784
3002454
3002455
NaN
NaN
NaN
NaN
NaN
NaN
NaN
SOUTH LAKE, TEXAS
-97.134178
32.941236
3002455
3002456
NaN
NaN
NaN
NaN
NaN
NaN
NaN
CLINTON, NEW JERSEY
-74.909890
40.636768
3002456
3002457
NaN
NaN
NaN
NaN
NaN
NaN
NaN
OWINGS MILL, MARYLAND
-76.780253
39.419550
3002457
3002458
NaN
NaN
NaN
NaN
NaN
NaN
NaN
ALTANTA, GEORGIA
-84.387982
33.748995
3002458 rows × 11 columns
In [3]:
lon = []
lon = df_data_2['lon'].values,df_data_2['lat'].values
lon
Out[3]:
(array([-83.7430378, -96.6988856, -74.0776417, ..., -74.90989 ,
-76.7802528, -84.3879824]),
array([ 42.2808256, 33.0198431, 40.7281575, ..., 40.6367682,
39.4195499, 33.7489954]))
In [14]:
#for long, lat in df_data_2['lon'].values for in df_data_2['lat'].values
geo = [list(a) if a!=['nan', 'nan'] for a in zip(df_data_2['lon'].values,df_data_2['lat'].values)]
File "<ipython-input-14-3d1e6af45427>", line 2
geo = [list(a) if a!=['nan', 'nan'] for a in zip(df_data_2['lon'].values,df_data_2['lat'].values)]
^
SyntaxError: invalid syntax
In [13]:
geo
Out[13]:
[[-83.743037799999996, 42.2808256],
[-96.698885599999997, 33.019843100000003],
[-74.077641700000001, 40.728157500000002],
[-104.990251, 39.739235799999996],
[-90.199404200000004, 38.627002500000003],
[-80.1917902, 25.7616798],
[-95.369802799999988, 29.7604267],
[-121.88632859999998, 37.338208200000004],
[nan, nan],
[-77.265260400000003, 38.901222499999996],
[-79.995886400000003, 40.440624799999995],
[-84.247211599999986, 43.615582500000002],
[nan, nan],
[-80.1917902, 25.7616798],
[-104.95081409999999, 39.617210100000001],
[-77.429129799999998, 39.006699299999994],
[-88.231481299999999, 43.011678400000001],
[-118.24368490000002, 34.052234200000001],
[-121.95523559999999, 37.354107899999995],
[-77.046921400000002, 38.804835499999996],
[-118.1597929, 34.0005691],
[-118.24368490000002, 34.052234200000001],
[-117.1610838, 32.715738000000002],
[-117.08419550000001, 32.6400541],
[-80.1917902, 25.7616798],
[-121.95523559999999, 37.354107899999995],
[-87.62979820000001, 41.878113599999999],
[-122.4194155, 37.774929499999999],
[-97.743060799999995, 30.267153000000004],
[-80.399774800000003, 26.100365399999998],
[-77.036870700000009, 38.907192299999998],
[-87.62979820000001, 41.878113599999999],
[-78.825562099999999, 35.823483000000003],
[-80.1917902, 25.7616798],
[-71.441810099999998, 41.911012299999996],
[-87.62979820000001, 41.878113599999999],
[-87.62979820000001, 41.878113599999999],
[-81.453449200000009, 41.605326599999998],
[-88.17340209999999, 41.817807000000002],
[-122.21706609999998, 47.482877600000002],
[-84.55553470000001, 42.732534999999999],
[-87.828954799999991, 42.127526700000004],
[-84.294089900000003, 34.075376200000008],
[-117.27114809999999, 32.991154999999999],
[-74.005941299999989, 40.712783700000003],
[nan, nan],
[-74.185420899999997, 40.806754600000005],
[-117.1610838, 32.715738000000002],
[-121.98857190000001, 37.548269700000006],
[-117.1610838, 32.715738000000002],
[-95.824395599999988, 29.785785300000001],
[-95.369802799999988, 29.7604267],
[-84.198579000000009, 34.028925899999997],
[-122.21706609999998, 47.482877600000002],
[nan, nan],
[nan, nan],
[-121.98857190000001, 37.548269700000006],
[nan, nan],
[-97.497483799999998, 25.901747199999999],
[-78.476678100000001, 38.029305900000004],
[-122.4194155, 37.774929499999999],
[-86.908065500000006, 40.425868600000001],
[-76.612189299999997, 39.290384799999998],
[-80.09420870000001, 26.934224600000004],
[-80.09420870000001, 26.934224600000004],
[-81.317844600000001, 28.758883299999997],
[-84.549932699999999, 33.952601999999999],
[-81.365624200000013, 28.661108899999999],
[-78.476678100000001, 38.029305900000004],
[-122.7140548, 38.440428999999995],
[-77.008587599999998, 40.214256499999998],
[-122.7140548, 38.440428999999995],
[-96.948894499999994, 32.814017699999994],
[-83.352709700000005, 42.368369999999999],
[-73.756231700000001, 42.652579299999992],
[-81.794810299999995, 26.142035800000002],
[-81.379236500000005, 28.538335499999999],
[-122.27111370000002, 37.804363700000003],
[-117.1610838, 32.715738000000002],
[-80.278105699999998, 25.857596300000001],
[-80.278105699999998, 25.857596300000001],
[-87.62979820000001, 41.878113599999999],
[-87.62979820000001, 41.878113599999999],
[-76.8620327, 38.968511200000002],
[-90.577067499999998, 38.663108299999998],
[-76.8620327, 38.968511200000002],
[-80.355330199999997, 25.8195424],
[-81.379236500000005, 28.538335499999999],
[-80.1917902, 25.7616798],
[-84.512019600000002, 39.103118200000004],
[-118.28169299999999, 33.831674499999998],
[nan, nan],
[nan, nan],
[-121.42522269999999, 37.739651299999998],
[-71.144773200000003, 42.158432399999995],
[-87.62979820000001, 41.878113599999999],
[-115.13982959999998, 36.169941200000004],
[-78.476678100000001, 38.029305900000004],
[-82.719267099999996, 28.244176799999998],
[nan, nan],
[-82.285924699999995, 27.937801],
[-118.40035630000001, 34.073620399999996],
[-74.005941299999989, 40.712783700000003],
[-80.296255500000001, 26.007764999999999],
[-118.4911912, 34.019454299999992],
[-87.62979820000001, 41.878113599999999],
[-87.62979820000001, 41.878113599999999],
[-77.357002799999989, 38.958630700000001],
[nan, nan],
[-72.251756900000004, 43.6422934],
[-74.005941299999989, 40.712783700000003],
[-80.1917902, 25.7616798],
[-91.140319599999998, 30.458282899999997],
[-74.005941299999989, 40.712783700000003],
[-87.828954799999991, 42.127526700000004],
[-85.587228600000003, 42.291706899999994],
[-82.719267099999996, 28.244176799999998],
[-74.005941299999989, 40.712783700000003],
[-80.278105699999998, 25.857596300000001],
[-84.387982399999999, 33.748995399999998],
[-122.272747, 37.871592600000007],
[-77.036870700000009, 38.907192299999998],
[-118.30896240000001, 33.888348700000002],
[-93.470786000000004, 44.854685600000003],
[-93.470786000000004, 44.854685600000003],
[-80.752607999999995, 32.216315999999999],
[-80.843126699999999, 35.227086900000003],
[-111.9260519, 33.494170399999994],
[-122.272747, 37.871592600000007],
[-122.20598329999999, 47.676892700000003],
[-119.17705159999998, 34.197504799999997],
[-71.228964099999999, 42.443037200000006],
[-83.234102799999988, 42.687532300000001],
[nan, nan],
[nan, nan],
[-71.058880099999996, 42.360082500000004],
[nan, nan],
[-74.005941299999989, 40.712783700000003],
[-77.944710200000003, 34.225725500000003],
[-74.005941299999989, 40.712783700000003],
[-80.053374599999998, 26.715342400000001],
[-122.33207079999998, 47.606209499999999],
[-80.1917902, 25.7616798],
[-77.429993899999999, 43.212285100000003],
[-82.665099200000014, 28.034184700000001],
[nan, nan],
[-83.221873099999996, 42.473368799999996],
[-104.990251, 39.739235799999996],
[nan, nan],
[-84.269644900000003, 36.010356099999996],
[-80.25659499999999, 26.166971100000001],
[-122.08385109999999, 37.386051700000003],
[-117.1610838, 32.715738000000002],
[-71.058880099999996, 42.360082500000004],
[-85.179714199999992, 42.321152200000007],
[-74.011653600000002, 40.893246900000001],
[-115.13982959999998, 36.169941200000004],
[-122.4442906, 47.252876799999996],
[-95.369802799999988, 29.7604267],
[-77.201370499999996, 39.143440600000005],
[-84.514376099999993, 33.883992600000006],
[-81.379236500000005, 28.538335499999999],
[-80.1917902, 25.7616798],
[-84.213530899999995, 33.941212700000001],
[-122.27111370000002, 37.804363700000003],
[-80.1917902, 25.7616798],
[-77.036870700000009, 38.907192299999998],
[-104.7091322, 40.423314200000007],
[-80.1917902, 25.7616798],
[-118.24368490000002, 34.052234200000001],
[-97.195013799999998, 32.991234999999996],
[-122.08385109999999, 37.386051700000003],
[-74.005941299999989, 40.712783700000003],
[-122.1817252, 37.452959800000002],
[-75.165221500000001, 39.9525839],
[-71.058880099999996, 42.360082500000004],
[-117.086421, 33.119206800000001],
[-122.4194155, 37.774929499999999],
[-86.868889900000013, 35.925063700000003],
[-90.577067499999998, 38.663108299999998],
[-119.17705159999998, 34.197504799999997],
[-117.79469420000001, 33.6839473],
[-83.234102799999988, 42.687532300000001],
[-119.17705159999998, 34.197504799999997],
[-122.1817252, 37.452959800000002],
[-83.352709700000005, 42.368369999999999],
[-104.7091322, 40.423314200000007],
[nan, nan],
[nan, nan],
[nan, nan],
[-118.24368490000002, 34.052234200000001],
[-83.149775099999999, 42.606409499999998],
[-118.24368490000002, 34.052234200000001],
[-157.8583333, 21.306944399999999],
[-118.24368490000002, 34.052234200000001],
[-86.908065500000006, 40.425868600000001],
[-84.294089900000003, 34.075376200000008],
[nan, nan],
[-111.65853370000001, 40.233843799999995],
[-80.579510999999997, 35.408751700000003],
[-84.213530899999995, 33.941212700000001],
[-111.9738304, 41.222999999999999],
[-77.152757800000003, 39.0839973],
[nan, nan],
[-75.539787799999999, 39.739072100000001],
[-80.1917902, 25.7616798],
[-80.148379000000006, 25.981202399999997],
[-80.148379000000006, 25.981202399999997],
[-76.945530099999999, 38.955944200000005],
[-104.990251, 39.739235799999996],
[nan, nan],
[-119.69819009999999, 34.420830500000001],
[-64.703197700000004, 17.746639699999999],
[-71.058880099999996, 42.360082500000004],
[-95.235250099999988, 38.971668900000004],
[-118.4911912, 34.019454299999992],
[-79.995886400000003, 40.440624799999995],
[-80.1917902, 25.7616798],
[-118.24368490000002, 34.052234200000001],
[-80.1917902, 25.7616798],
[-80.137317400000015, 26.122438600000002],
[-86.518604500000009, 35.982841200000003],
[-86.518604500000009, 35.982841200000003],
[-79.037738799999985, 43.0962143],
[-86.518604500000009, 35.982841200000003],
[-117.1610838, 32.715738000000002],
[-71.058880099999996, 42.360082500000004],
[-117.08419550000001, 32.6400541],
[-86.518604500000009, 35.982841200000003],
[-122.4194155, 37.774929499999999],
[-81.099834200000004, 32.0835407],
[nan, nan],
[-86.158068, 39.768402999999999],
[-95.235250099999988, 38.971668900000004],
[-111.65853370000001, 40.233843799999995],
[-111.65853370000001, 40.233843799999995],
[-95.369802799999988, 29.7604267],
[-111.65853370000001, 40.233843799999995],
[-111.65853370000001, 40.233843799999995],
[nan, nan],
[-89.401230200000001, 43.073051700000008],
[nan, nan],
[-95.369802799999988, 29.7604267],
[nan, nan],
[-80.1917902, 25.7616798],
[-118.4911912, 34.019454299999992],
[nan, nan],
[-111.65853370000001, 40.233843799999995],
[-97.497483799999998, 25.901747199999999],
[nan, nan],
[-111.65853370000001, 40.233843799999995],
[nan, nan],
[-80.139212099999995, 25.956481199999999],
[-86.158068, 39.768402999999999],
[-85.668086299999999, 42.9633599],
[-93.99939959999999, 44.163577500000002],
[-96.948894499999994, 32.814017699999994],
[-87.894522899999998, 42.698074900000002],
[nan, nan],
[-74.364612199999996, 40.714637599999996],
[-77.475266700000006, 38.750948799999996],
[-77.036870700000009, 38.907192299999998],
[-80.1917902, 25.7616798],
[-87.62979820000001, 41.878113599999999],
[-84.387982399999999, 33.748995399999998],
[-80.843126699999999, 35.227086900000003],
[-87.62979820000001, 41.878113599999999],
[-84.512019600000002, 39.103118200000004],
[-117.1610838, 32.715738000000002],
[-84.387982399999999, 33.748995399999998],
[-87.62979820000001, 41.878113599999999],
[nan, nan],
[-91.140319599999998, 30.458282899999997],
[-74.364612199999996, 40.714637599999996],
[-87.62979820000001, 41.878113599999999],
[-73.8201337, 41.055096899999995],
[-84.512019600000002, 39.103118200000004],
[-121.96237509999999, 37.235807799999996],
[-80.1917902, 25.7616798],
[-88.282566799999998, 42.035408399999994],
[-118.24368490000002, 34.052234200000001],
[-80.162824799999996, 25.693712999999999],
[-75.514912800000005, 40.130382200000007],
[-89.401230200000001, 43.073051700000008],
[-74.58529200000001, 40.338254299999996],
[-104.9719243, 39.8680412],
[nan, nan],
[-80.1917902, 25.7616798],
[-71.152276499999999, 42.479261799999996],
[-71.4161565, 42.279285999999999],
[nan, nan],
[-118.1445155, 34.147784899999998],
[-81.655651000000006, 30.332183799999999],
[-87.840625000000003, 42.258634200000003],
[145.72978909999998, 15.151515300000002],
[-84.387982399999999, 33.748995399999998],
[nan, nan],
[145.72978909999998, 15.151515300000002],
[-118.24368490000002, 34.052234200000001],
[-71.152276499999999, 42.479261799999996],
[-72.927883499999993, 41.308273999999997],
[-75.514912800000005, 40.130382200000007],
[-74.005941299999989, 40.712783700000003],
[-84.144637599999996, 34.002878600000003],
[145.72978909999998, 15.151515300000002],
[-93.99939959999999, 44.163577500000002],
[-93.99939959999999, 44.163577500000002],
[145.72978909999998, 15.151515300000002],
[-83.9207392, 35.960638399999993],
[-87.828954799999991, 42.127526700000004],
[-77.036870700000009, 38.907192299999998],
[-84.294089900000003, 34.075376200000008],
[-77.036870700000009, 38.907192299999998],
[-84.309939, 39.360058600000002],
[-117.1610838, 32.715738000000002],
[-70.255325900000003, 43.661470999999999],
[-117.79469420000001, 33.6839473],
[-74.005941299999989, 40.712783700000003],
[-118.34062879999999, 33.835849200000006],
[nan, nan],
[-82.998794200000006, 39.961175500000003],
[-84.398276299999992, 39.515057599999999],
[nan, nan],
[-81.379236500000005, 28.538335499999999],
[-117.43504799999999, 34.092233499999999],
[-80.1917902, 25.7616798],
[-122.40774979999999, 37.654656000000003],
[-88.320071499999997, 41.760584899999998],
[-73.8201337, 41.055096899999995],
[-71.058880099999996, 42.360082500000004],
[-80.1917902, 25.7616798],
[144.78786890000001, 13.497020499999998],
[-95.750781499999988, 30.2093794],
[-122.4194155, 37.774929499999999],
[-74.005941299999989, 40.712783700000003],
[-96.796987900000005, 32.776664199999999],
[-77.094709200000011, 38.984652000000004],
[-74.005941299999989, 40.712783700000003],
[-122.4194155, 37.774929499999999],
[-86.158068, 39.768402999999999],
[145.72978909999998, 15.151515300000002],
[-80.308661900000004, 25.908705600000001],
[-122.32552539999999, 37.562991700000005],
[-116.54529209999998, 33.830296099999998],
[-84.294089900000003, 34.075376200000008],
[-80.1917902, 25.7616798],
[-95.369802799999988, 29.7604267],
[-110.926479, 32.221742900000002],
[-97.5164276, 35.467560200000001],
[-94.208817199999999, 36.372853799999994],
[-118.1445155, 34.147784899999998],
[-95.369802799999988, 29.7604267],
[nan, nan],
[-122.4194155, 37.774929499999999],
[-71.058880099999996, 42.360082500000004],
[-118.3812562, 34.187044],
[-77.036870700000009, 38.907192299999998],
[-115.13982959999998, 36.169941200000004],
[-110.926479, 32.221742900000002],
[-122.4194155, 37.774929499999999],
[-80.1289321, 26.368306400000002],
[-90.587068599999995, 38.592553199999998],
[nan, nan],
[-80.130045499999994, 25.790654],
[-117.3961564, 33.953348699999999],
[-122.4442906, 47.252876799999996],
[-122.4194155, 37.774929499999999],
[-117.35059390000001, 33.158093300000004],
[-87.62979820000001, 41.878113599999999],
[nan, nan],
[-73.944157900000008, 40.6781784],
[-80.308661900000004, 25.908705600000001],
[nan, nan],
[-84.294089900000003, 34.075376200000008],
[-95.634946299999996, 29.619678700000001],
[-84.296312299999997, 33.774827500000001],
[nan, nan],
[-80.1917902, 25.7616798],
[-80.1917902, 25.7616798],
[-80.1917902, 25.7616798],
[-80.1917902, 25.7616798],
[-122.272747, 37.871592600000007],
[-121.7568946, 36.910231000000003],
[-80.355330199999997, 25.8195424],
[-80.355330199999997, 25.8195424],
[-80.1917902, 25.7616798],
[nan, nan],
[-105.27054560000001, 40.014985600000003],
[-122.4194155, 37.774929499999999],
[-97.134178299999988, 32.9412363],
[-71.133711200000008, 42.353903799999998],
[-88.266753400000013, 42.333354999999997],
[-88.266753400000013, 42.333354999999997],
[-88.266753400000013, 42.333354999999997],
[-118.451357, 34.148971899999999],
[nan, nan],
[145.72978909999998, 15.151515300000002],
[-80.355330199999997, 25.8195424],
[-122.4194155, 37.774929499999999],
[-118.7616764, 34.153339500000001],
[-118.4164652, 33.919179900000003],
[-104.990251, 39.739235799999996],
[-104.95081409999999, 39.617210100000001],
[-76.612189299999997, 39.290384799999998],
[-118.4911912, 34.019454299999992],
[-95.369802799999988, 29.7604267],
[-80.1917902, 25.7616798],
[-87.8664579, 42.193358099999998],
[-95.369802799999988, 29.7604267],
[-118.24368490000002, 34.052234200000001],
[-118.24368490000002, 34.052234200000001],
[-115.13982959999998, 36.169941200000004],
[-104.990251, 39.739235799999996],
[-87.844511900000001, 42.171136499999996],
[-122.4194155, 37.774929499999999],
[-87.571089799999996, 37.971559200000002],
[-117.79469420000001, 33.6839473],
[-87.8664579, 42.193358099999998],
[-122.4194155, 37.774929499999999],
[-87.844511900000001, 42.171136499999996],
[-74.005941299999989, 40.712783700000003],
[-74.005941299999989, 40.712783700000003],
[-122.4786854, 48.751911200000002],
[-111.83147240000001, 33.4151843],
[-122.4194155, 37.774929499999999],
[-122.4194155, 37.774929499999999],
[-122.4194155, 37.774929499999999],
[-115.13982959999998, 36.169941200000004],
[-96.796987900000005, 32.776664199999999],
[-75.369889499999999, 40.041599600000005],
[-74.005941299999989, 40.712783700000003],
[-96.796987900000005, 32.776664199999999],
[-77.046921400000002, 38.804835499999996],
[-118.30896609999998, 34.180839200000008],
[-118.30896609999998, 34.180839200000008],
[-84.387982399999999, 33.748995399999998],
[-87.62979820000001, 41.878113599999999],
[-117.66255090000001, 33.501693199999998],
[nan, nan],
[-77.036870700000009, 38.907192299999998],
[-118.24368490000002, 34.052234200000001],
[-85.587228600000003, 42.291706899999994],
[-77.036870700000009, 38.907192299999998],
[-118.24368490000002, 34.052234200000001],
[-94.670791699999995, 38.982228200000002],
[-122.4194155, 37.774929499999999],
[-80.843126699999999, 35.227086900000003],
[-117.1610838, 32.715738000000002],
[-115.13982959999998, 36.169941200000004],
[-87.741624599999994, 42.032402500000003],
[-87.964507699999999, 41.850030199999999],
[-81.655651000000006, 30.332183799999999],
[-157.8583333, 21.306944399999999],
[-74.005941299999989, 40.712783700000003],
[-118.24368490000002, 34.052234200000001],
[-110.926479, 32.221742900000002],
[-87.62979820000001, 41.878113599999999],
[-157.8583333, 21.306944399999999],
[-77.036870700000009, 38.907192299999998],
[-74.005941299999989, 40.712783700000003],
[-87.741624599999994, 42.032402500000003],
[-115.13982959999998, 36.169941200000004],
[-81.872308400000009, 26.640628000000003],
[-115.13982959999998, 36.169941200000004],
[-97.743060799999995, 30.267153000000004],
[-87.741624599999994, 42.032402500000003],
[-87.62979820000001, 41.878113599999999],
[-122.06518190000001, 37.910078300000002],
[-122.48525069999999, 37.859093700000003],
[-122.06518190000001, 37.910078300000002],
[nan, nan],
[-122.48525069999999, 37.859093700000003],
[145.72978909999998, 15.151515300000002],
[-118.40035630000001, 34.073620399999996],
[145.72978909999998, 15.151515300000002],
[-118.24368490000002, 34.052234200000001],
[-105.27054560000001, 40.014985600000003],
[-71.058880099999996, 42.360082500000004],
[nan, nan],
[-117.2919818, 33.0369867],
[-80.588664600000001, 28.034462100000006],
[-98.493628200000003, 29.424121899999999],
[-80.588664600000001, 28.034462100000006],
[-115.13982959999998, 36.169941200000004],
[-77.106769799999995, 38.879969700000004],
[-110.926479, 32.221742900000002],
[-122.48525069999999, 37.859093700000003],
[-95.369802799999988, 29.7604267],
[-74.948886000000002, 39.968881700000004],
[-96.796987900000005, 32.776664199999999],
[-81.694360500000002, 41.499320000000004],
[-118.41090890000001, 33.884736100000005],
[-122.0540996, 37.411269099999998],
[-71.19562049999999, 42.504716100000003],
[-75.627458300000001, 40.032581700000001],
[-74.005941299999989, 40.712783700000003],
[-122.4194155, 37.774929499999999],
[-74.005941299999989, 40.712783700000003],
[-66.105735499999994, 18.465539399999997],
[-77.036870700000009, 38.907192299999998],
[-118.41090890000001, 33.884736100000005],
[-118.24368490000002, 34.052234200000001],
[-87.964507699999999, 41.850030199999999],
[-157.8583333, 21.306944399999999],
[-134.4197222, 58.301944400000004],
[-118.4911912, 34.019454299999992],
[-117.6119925, 33.426972800000001],
[-111.49797290000001, 40.646062200000003],
[-87.906473599999998, 43.038902499999999],
[-117.1610838, 32.715738000000002],
[-77.036870700000009, 38.907192299999998],
[-122.4194155, 37.774929499999999],
[-122.4194155, 37.774929499999999],
[-111.9260519, 33.494170399999994],
[-77.036870700000009, 38.907192299999998],
[-73.944157900000008, 40.6781784],
[-157.8583333, 21.306944399999999],
[-77.036870700000009, 38.907192299999998],
[-74.005941299999989, 40.712783700000003],
[-115.13982959999998, 36.169941200000004],
[-117.6119925, 33.426972800000001],
[-77.357002799999989, 38.958630700000001],
[-121.9357918, 37.702152099999999],
[-87.964507699999999, 41.850030199999999],
[-73.944157900000008, 40.6781784],
[-81.379236500000005, 28.538335499999999],
[-121.9357918, 37.702152099999999],
[-73.944157900000008, 40.6781784],
[-111.49797290000001, 40.646062200000003],
[-87.62979820000001, 41.878113599999999],
[-104.990251, 39.739235799999996],
[-115.13982959999998, 36.169941200000004],
[-74.005941299999989, 40.712783700000003],
[-95.369802799999988, 29.7604267],
[-104.990251, 39.739235799999996],
[-115.13982959999998, 36.169941200000004],
[-81.385259500000004, 30.240005800000002],
[-84.549932699999999, 33.952601999999999],
[-117.14836480000001, 33.493639100000003],
[-87.62979820000001, 41.878113599999999],
[-81.794810299999995, 26.142035800000002],
[-84.387982399999999, 33.748995399999998],
[-104.990251, 39.739235799999996],
[-117.8531119, 33.787794399999996],
[-121.8946761, 36.600237799999995],
[nan, nan],
[-74.005941299999989, 40.712783700000003],
[-118.34062879999999, 33.835849200000006],
[-74.005941299999989, 40.712783700000003],
[-118.4911912, 34.019454299999992],
[-87.62979820000001, 41.878113599999999],
[-157.8583333, 21.306944399999999],
[-122.1817252, 37.452959800000002],
[-74.005941299999989, 40.712783700000003],
[-105.27054560000001, 40.014985600000003],
[-74.005941299999989, 40.712783700000003],
[-118.4911912, 34.019454299999992],
[-115.13982959999998, 36.169941200000004],
[-115.13982959999998, 36.169941200000004],
[-74.005941299999989, 40.712783700000003],
[-74.005941299999989, 40.712783700000003],
[-115.13982959999998, 36.169941200000004],
[-88.050340599999998, 41.746974899999998],
[-74.005941299999989, 40.712783700000003],
[-104.990251, 39.739235799999996],
[-96.638883300000003, 32.912624000000001],
[nan, nan],
[-111.89104740000002, 40.760779299999996],
[nan, nan],
[-122.4194155, 37.774929499999999],
[-122.4194155, 37.774929499999999],
[-115.13982959999998, 36.169941200000004],
[-111.89104740000002, 40.760779299999996],
[-74.005941299999989, 40.712783700000003],
[-122.14301950000001, 37.441883399999995],
[-74.131809599999997, 40.940376200000003],
[-74.005941299999989, 40.712783700000003],
[-122.1817252, 37.452959800000002],
[-122.4194155, 37.774929499999999],
[-157.8583333, 21.306944399999999],
[-115.13982959999998, 36.169941200000004],
[-115.13982959999998, 36.169941200000004],
[-95.369802799999988, 29.7604267],
[-122.52747549999999, 37.925480600000007],
[nan, nan],
[-84.549932699999999, 33.952601999999999],
[-118.4911912, 34.019454299999992],
[-117.79469420000001, 33.6839473],
[-105.27054560000001, 40.014985600000003],
[-122.08079640000001, 37.668820500000002],
[-74.005941299999989, 40.712783700000003],
[-81.034814400000002, 34.000710400000003],
[-81.034814400000002, 34.000710400000003],
[-84.017690400000006, 33.667610299999993],
[-74.005941299999989, 40.712783700000003],
[-93.455787700000002, 45.072464200000006],
[-122.4194155, 37.774929499999999],
[-122.4194155, 37.774929499999999],
[-78.825562099999999, 35.823483000000003],
[-74.005941299999989, 40.712783700000003],
[-95.369802799999988, 29.7604267],
[-94.208817199999999, 36.372853799999994],
[-88.122719900000007, 42.062991499999995],
[-80.399774800000003, 26.100365399999998],
[-73.714447700000008, 41.040013500000001],
[-77.944710200000003, 34.225725500000003],
[-122.4194155, 37.774929499999999],
[-117.8531119, 33.787794399999996],
[-87.62979820000001, 41.878113599999999],
[-95.824395599999988, 29.785785300000001],
[nan, nan],
[-74.005941299999989, 40.712783700000003],
[-111.929658, 40.562170399999999],
[-71.058880099999996, 42.360082500000004],
[-74.005941299999989, 40.712783700000003],
[-95.369802799999988, 29.7604267],
[-96.796987900000005, 32.776664199999999],
[-95.369802799999988, 29.7604267],
[-122.4194155, 37.774929499999999],
[-88.207269699999998, 40.110587500000001],
[-80.245604499999999, 25.942037699999997],
[-73.357904900000008, 41.141471700000004],
[-74.005941299999989, 40.712783700000003],
[-74.005941299999989, 40.712783700000003],
[-122.4194155, 37.774929499999999],
[-77.306373300000004, 38.846223600000002],
[-77.306373300000004, 38.846223600000002],
[-122.4194155, 37.774929499999999],
[-96.400306900000004, 42.499994200000003],
[-104.98775970000001, 39.647765299999996],
[-83.149775099999999, 42.606409499999998],
[-74.005941299999989, 40.712783700000003],
[nan, nan],
[-77.036870700000009, 38.907192299999998],
[-77.036870700000009, 38.907192299999998],
[-71.348948400000012, 42.460371899999998],
[-122.03634960000001, 37.368829999999996],
[-83.367716799999997, 42.498993599999999],
[-117.91450359999999, 33.835293200000002],
[-122.67648159999999, 45.523062200000005],
[-73.762909700000009, 41.033986200000001],
[nan, nan],
[-122.2020794, 47.978984799999999],
[-80.1917902, 25.7616798],
[-74.150200699999999, 40.579531700000004],
[-73.864261299999995, 40.744985900000003],
[-76.937759999999997, 38.989696700000003],
[nan, nan],
[-121.88632859999998, 37.338208200000004],
[-84.387982399999999, 33.748995399999998],
[-82.254283400000006, 34.737063899999995],
[-74.005941299999989, 40.712783700000003],
[-74.005941299999989, 40.712783700000003],
[-74.005941299999989, 40.712783700000003],
[-74.005941299999989, 40.712783700000003],
[-74.005941299999989, 40.712783700000003],
[nan, nan],
[-95.369802799999988, 29.7604267],
[-74.005941299999989, 40.712783700000003],
[nan, nan],
[-96.889963599999987, 32.975641499999995],
[-122.4194155, 37.774929499999999],
[-111.89104740000002, 40.760779299999996],
[-87.62979820000001, 41.878113599999999],
[-74.045140500000002, 40.841211100000002],
[-86.781601599999988, 36.162663799999997],
[-79.995886400000003, 40.440624799999995],
[nan, nan],
[-82.254283400000006, 34.737063899999995],
[-74.005941299999989, 40.712783700000003],
[-84.549932699999999, 33.952601999999999],
[-95.369802799999988, 29.7604267],
[-84.512019600000002, 39.103118200000004],
[-118.40035630000001, 34.073620399999996],
[-77.046921400000002, 38.804835499999996],
[-77.046921400000002, 38.804835499999996],
[nan, nan],
[-118.3870173, 33.744461299999998],
[-80.149490099999994, 26.011201400000001],
[-95.616054900000009, 30.097162100000006],
[-117.9289469, 33.618910100000001],
[-74.059307499999989, 40.8598219],
[-78.886694300000002, 33.689060299999994],
[-95.369802799999988, 29.7604267],
[-84.198579000000009, 34.028925899999997],
[-122.12151200000001, 47.673988100000003],
[-77.046921400000002, 38.804835499999996],
[-122.34808999999998, 37.5778696],
[-80.1917902, 25.7616798],
[-74.005941299999989, 40.712783700000003],
[-105.0749801, 40.397761200000005],
[-71.19562049999999, 42.504716100000003],
[-81.40757099999999, 28.291955699999995],
[144.75022280000002, 13.476282399999999],
[-122.28524729999999, 37.831315999999994],
[-91.239580700000005, 43.801355600000001],
[nan, nan],
[-73.538734099999985, 41.053430200000001],
[-89.58898640000001, 40.693648799999998],
[-122.03218229999999, 37.322997799999996],
[-84.294089900000003, 34.075376200000008],
[-117.1610838, 32.715738000000002],
[-74.549328400000007, 40.706617399999999],
[-74.005941299999989, 40.712783700000003],
[-71.058880099999996, 42.360082500000004],
[nan, nan],
[-76.612189299999997, 39.290384799999998],
[-79.9414266, 37.270970399999996],
[-122.4194155, 37.774929499999999],
[-95.616054900000009, 30.097162100000006],
[-74.005941299999989, 40.712783700000003],
[-87.988955599999997, 41.931696000000002],
[-96.796987900000005, 32.776664199999999],
[-97.743060799999995, 30.267153000000004],
[-84.140192599999992, 34.207319599999998],
[-118.4694832, 33.9850469],
[-74.005941299999989, 40.712783700000003],
[nan, nan],
[-122.4194155, 37.774929499999999],
[-121.95523559999999, 37.354107899999995],
[-122.4194155, 37.774929499999999],
[-121.95523559999999, 37.354107899999995],
[-112.00105009999999, 40.691613200000006],
[-74.005941299999989, 40.712783700000003],
[-87.988955599999997, 41.931696000000002],
[-97.743060799999995, 30.267153000000004],
[-74.005941299999989, 40.712783700000003],
[-95.369802799999988, 29.7604267],
[-105.27054560000001, 40.014985600000003],
[-74.005941299999989, 40.712783700000003],
[-122.27111370000002, 37.804363700000003],
[-122.4194155, 37.774929499999999],
[-95.369802799999988, 29.7604267],
[-97.228902900000008, 32.8342952],
[-74.005941299999989, 40.712783700000003],
[-122.20558829999999, 47.760950000000001],
[-121.8995741, 37.432334099999999],
[-105.27054560000001, 40.014985600000003],
[-96.889963599999987, 32.975641499999995],
[-122.03218229999999, 37.322997799999996],
[-122.03218229999999, 37.322997799999996],
[-122.03218229999999, 37.322997799999996],
[-80.1917902, 25.7616798],
[-122.08385109999999, 37.386051700000003],
[-79.995886400000003, 40.440624799999995],
[-122.33207079999998, 47.606209499999999],
[nan, nan],
[-74.005941299999989, 40.712783700000003],
[-96.948894499999994, 32.814017699999994],
[-74.005941299999989, 40.712783700000003],
[-74.417096999999998, 40.759822700000001],
[-72.993296900000004, 40.885835299999997],
[-95.369802799999988, 29.7604267],
[-85.864940799999999, 37.703064599999998],
[nan, nan],
[-87.62979820000001, 41.878113599999999],
[-96.698885599999997, 33.019843100000003],
[-75.439593099999996, 40.045823999999996],
[-74.005941299999989, 40.712783700000003],
[nan, nan],
[-95.369802799999988, 29.7604267],
[-71.802293399999996, 42.262593200000005],
[-80.1917902, 25.7616798],
[-111.8507662, 40.391617199999999],
[-96.639782199999999, 33.197246500000006],
[-74.667222599999988, 40.357297600000003],
[-85.864940799999999, 37.703064599999998],
[-122.4194155, 37.774929499999999],
[-122.4194155, 37.774929499999999],
[-87.62979820000001, 41.878113599999999],
[-71.209221400000004, 42.337041299999996],
[-74.005941299999989, 40.712783700000003],
[-74.005941299999989, 40.712783700000003],
[-121.88632859999998, 37.338208200000004],
[-122.33207079999998, 47.606209499999999],
[-111.8507662, 40.391617199999999],
[-111.8507662, 40.391617199999999],
[-71.209221400000004, 42.337041299999996],
[-121.88632859999998, 37.338208200000004],
[-122.33207079999998, 47.606209499999999],
[-83.221873099999996, 42.473368799999996],
[-122.4194155, 37.774929499999999],
[-121.88632859999998, 37.338208200000004],
[-111.8507662, 40.391617199999999],
[-157.8583333, 21.306944399999999],
[nan, nan],
[-74.005941299999989, 40.712783700000003],
[-157.8583333, 21.306944399999999],
[-74.005941299999989, 40.712783700000003],
[-96.698885599999997, 33.019843100000003],
[-74.005941299999989, 40.712783700000003],
[-95.209100599999999, 29.691062500000001],
[nan, nan],
[nan, nan],
[-118.072846, 34.080565100000001],
[-118.072846, 34.080565100000001],
[-75.165221500000001, 39.9525839],
[-111.86382260000001, 40.524671099999999],
[-74.005941299999989, 40.712783700000003],
[-74.005941299999989, 40.712783700000003],
[nan, nan],
[-122.1817252, 37.452959800000002],
[-122.4194155, 37.774929499999999],
[-106.48502169999999, 31.761877800000001],
[-96.698885599999997, 33.019843100000003],
[nan, nan],
[nan, nan],
[-87.828954799999991, 42.127526700000004],
[-77.036870700000009, 38.907192299999998],
[-117.8678338, 33.745573100000001],
[-74.005941299999989, 40.712783700000003],
[-74.005941299999989, 40.712783700000003],
[-122.08079640000001, 37.668820500000002],
[-87.62979820000001, 41.878113599999999],
[-73.357904900000008, 41.141471700000004],
[-117.1610838, 32.715738000000002],
[-74.005941299999989, 40.712783700000003],
[-122.03634960000001, 37.368829999999996],
[nan, nan],
[-96.698885599999997, 33.019843100000003],
[-106.48502169999999, 31.761877800000001],
[-96.698885599999997, 33.019843100000003],
[-71.058880099999996, 42.360082500000004],
[-118.7616764, 34.153339500000001],
[-74.005941299999989, 40.712783700000003],
[-87.62979820000001, 41.878113599999999],
[-80.1917902, 25.7616798],
[-114.62769159999999, 32.6926512],
[nan, nan],
[-117.72560829999999, 33.567684200000002],
[nan, nan],
[-72.588422200000011, 42.341756500000002],
[-80.130045499999994, 25.790654],
[-87.62979820000001, 41.878113599999999],
[nan, nan],
[-122.1817252, 37.452959800000002],
[-71.058880099999996, 42.360082500000004],
[-89.781174500000006, 30.275194500000001],
[-112.07403729999999, 33.448377100000002],
[-118.83759369999999, 34.170560899999998],
[-94.208817199999999, 36.372853799999994],
[-122.4194155, 37.774929499999999],
[nan, nan],
[-122.4194155, 37.774929499999999],
[nan, nan],
[nan, nan],
[-77.036870700000009, 38.907192299999998],
[-84.213530899999995, 33.941212700000001],
[-95.301062400000006, 32.351260100000005],
[-111.929658, 40.562170399999999],
[-93.750178900000009, 32.525151600000001],
[-89.781174500000006, 30.275194500000001],
[-122.4194155, 37.774929499999999],
[nan, nan],
[-122.4194155, 37.774929499999999],
[-95.369802799999988, 29.7604267],
[-122.4194155, 37.774929499999999],
[-122.4194155, 37.774929499999999],
[-87.906473599999998, 43.038902499999999],
[-122.32552539999999, 37.562991700000005],
[-80.137317400000015, 26.122438600000002],
[-118.1597929, 34.0005691],
[-112.07403729999999, 33.448377100000002],
[-87.906473599999998, 43.038902499999999],
[-122.41108349999999, 37.630490399999999],
[-97.330053000000007, 37.687176100000002],
[-97.015007799999992, 32.954568699999996],
[-87.62979820000001, 41.878113599999999],
[-74.005941299999989, 40.712783700000003],
[-73.780144700000008, 40.733517900000002],
[-80.1917902, 25.7616798],
[-118.24368490000002, 34.052234200000001],
[-74.043473599999999, 40.885932500000003],
[nan, nan],
[-94.208817199999999, 36.372853799999994],
[-74.005941299999989, 40.712783700000003],
[-94.208817199999999, 36.372853799999994],
[-73.923461900000007, 40.764357399999994],
[-74.075418900000003, 40.944542799999994],
[-74.005941299999989, 40.712783700000003],
[-122.4194155, 37.774929499999999],
[-73.538734099999985, 41.053430200000001],
[-118.41338940000001, 34.053660799999996],
[-95.369802799999988, 29.7604267],
[-95.369802799999988, 29.7604267],
[-71.10973349999999, 42.373615799999996],
[-74.005941299999989, 40.712783700000003],
[-122.32552539999999, 37.562991700000005],
[nan, nan],
[-104.990251, 39.739235799999996],
[-96.236846499999999, 41.283195800000001],
[-74.364612199999996, 40.714637599999996],
[-88.060380600000002, 44.448880500000001],
[-74.005941299999989, 40.712783700000003],
[-75.316295099999991, 40.069832099999999],
[-122.33207079999998, 47.606209499999999],
[-117.93534129999999, 34.106952700000001],
[-122.4194155, 37.774929499999999],
[-111.83147240000001, 33.4151843],
[-91.665623200000013, 41.9778795],
[nan, nan],
[-112.07403729999999, 33.448377100000002],
[-84.213530899999995, 33.941212700000001],
[-89.810085799999996, 35.086757700000007],
[nan, nan],
[nan, nan],
[-88.089506099999994, 41.647530600000003],
[-73.437898799999999, 41.1953739],
[-74.005941299999989, 40.712783700000003],
[-118.24368490000002, 34.052234200000001],
[-75.513811799999999, 40.036218400000003],
[-95.369802799999988, 29.7604267],
[-80.397273599999991, 27.638643399999999],
[-92.019842699999998, 30.2240897],
[-74.005941299999989, 40.712783700000003],
[-77.036870700000009, 38.907192299999998],
[-118.3531311, 33.961680100000002],
[nan, nan],
[-117.35059390000001, 33.158093300000004],
[-96.698885599999997, 33.019843100000003],
[-96.796987900000005, 32.776664199999999],
[-74.005941299999989, 40.712783700000003],
[-80.1917902, 25.7616798],
[-75.513811799999999, 40.036218400000003],
[-80.752607999999995, 32.216315999999999],
[-71.058880099999996, 42.360082500000004],
[-122.4194155, 37.774929499999999],
[nan, nan],
[-80.1917902, 25.7616798],
[-112.18598659999999, 33.538652299999995],
[-74.005941299999989, 40.712783700000003],
[-118.6089752, 34.165357],
[-74.005941299999989, 40.712783700000003],
[-74.005941299999989, 40.712783700000003],
[-73.944157900000008, 40.6781784],
[-106.60555340000001, 35.085333599999998],
[-80.579510999999997, 35.408751700000003],
[-121.87467890000001, 37.6624312],
[nan, nan],
[-118.4694832, 33.9850469],
[-120.84659409999999, 37.494656799999994],
[-77.036870700000009, 38.907192299999998],
[-118.28169299999999, 33.831674499999998],
[-77.036870700000009, 38.907192299999998],
[-75.383552500000008, 40.101285600000004],
[-122.03218229999999, 37.322997799999996],
[-77.177260400000009, 38.933867600000006],
[-118.3531311, 33.961680100000002],
[-74.005941299999989, 40.712783700000003],
[-122.4194155, 37.774929499999999],
[-73.939568700000009, 42.8142432],
[-121.98857190000001, 37.548269700000006],
[-77.036870700000009, 38.907192299999998],
[-94.208817199999999, 36.372853799999994],
[-98.230012400000007, 26.203407100000003],
[-74.005941299999989, 40.712783700000003],
[-80.1917902, 25.7616798],
[-74.005941299999989, 40.712783700000003],
[-117.8678338, 33.745573100000001],
[-74.005941299999989, 40.712783700000003],
[-83.555211999999997, 41.663938299999998],
[-121.8995741, 37.432334099999999],
[-86.158068, 39.768402999999999],
[-95.369802799999988, 29.7604267],
[nan, nan],
[-117.7325848, 33.989818799999995],
[-73.997639000000007, 40.848155600000005],
[-74.005941299999989, 40.712783700000003],
[-112.07403729999999, 33.448377100000002],
[-87.62979820000001, 41.878113599999999],
[-74.005941299999989, 40.712783700000003],
[-73.538734099999985, 41.053430200000001],
[nan, nan],
[-122.4194155, 37.774929499999999],
[-87.787840799999998, 42.069750899999995],
[nan, nan],
[-97.5164276, 35.467560200000001],
[-66.111068200000005, 18.3615548],
[-122.29289740000002, 47.185378499999999],
[-122.4194155, 37.774929499999999],
[-87.62979820000001, 41.878113599999999],
[-118.4164652, 33.919179900000003],
[-122.4194155, 37.774929499999999],
[-115.13982959999998, 36.169941200000004],
[-74.005941299999989, 40.712783700000003],
[-74.005941299999989, 40.712783700000003],
[-72.571755100000004, 41.848987200000003],
[-74.075418900000003, 40.944542799999994],
[-72.949270299999995, 41.671764799999998],
[-117.79469420000001, 33.6839473],
[-121.88632859999998, 37.338208200000004],
[-118.8073729, 34.146646699999998],
[-74.790717999999998, 40.328440200000003],
[-95.369802799999988, 29.7604267],
[-117.68894399999999, 34.012234599999999],
[-74.364724699999996, 40.820062299999996],
[-122.27580079999998, 37.520214500000002],
[-80.137317400000015, 26.122438600000002],
[-93.265010799999999, 44.977753],
[-80.137317400000015, 26.122438600000002],
...]
In [ ]:
numpy.random.seed(0)
seaborn.set()
uniform_data = lon
ax = seaborn.heatmap(uniform_data)
KeyboardInterrupt
In [10]:
uniform_data
Out[10]:
array([[ 0.5488135 , 0.71518937, 0.60276338, 0.54488318, 0.4236548 ,
0.64589411, 0.43758721, 0.891773 , 0.96366276, 0.38344152,
0.79172504, 0.52889492],
[ 0.56804456, 0.92559664, 0.07103606, 0.0871293 , 0.0202184 ,
0.83261985, 0.77815675, 0.87001215, 0.97861834, 0.79915856,
0.46147936, 0.78052918],
[ 0.11827443, 0.63992102, 0.14335329, 0.94466892, 0.52184832,
0.41466194, 0.26455561, 0.77423369, 0.45615033, 0.56843395,
0.0187898 , 0.6176355 ],
[ 0.61209572, 0.616934 , 0.94374808, 0.6818203 , 0.3595079 ,
0.43703195, 0.6976312 , 0.06022547, 0.66676672, 0.67063787,
0.21038256, 0.1289263 ],
[ 0.31542835, 0.36371077, 0.57019677, 0.43860151, 0.98837384,
0.10204481, 0.20887676, 0.16130952, 0.65310833, 0.2532916 ,
0.46631077, 0.24442559],
[ 0.15896958, 0.11037514, 0.65632959, 0.13818295, 0.19658236,
0.36872517, 0.82099323, 0.09710128, 0.83794491, 0.09609841,
0.97645947, 0.4686512 ],
[ 0.97676109, 0.60484552, 0.73926358, 0.03918779, 0.28280696,
0.12019656, 0.2961402 , 0.11872772, 0.31798318, 0.41426299,
0.0641475 , 0.69247212],
[ 0.56660145, 0.26538949, 0.52324805, 0.09394051, 0.5759465 ,
0.9292962 , 0.31856895, 0.66741038, 0.13179786, 0.7163272 ,
0.28940609, 0.18319136],
[ 0.58651293, 0.02010755, 0.82894003, 0.00469548, 0.67781654,
0.27000797, 0.73519402, 0.96218855, 0.24875314, 0.57615733,
0.59204193, 0.57225191],
[ 0.22308163, 0.95274901, 0.44712538, 0.84640867, 0.69947928,
0.29743695, 0.81379782, 0.39650574, 0.8811032 , 0.58127287,
0.88173536, 0.69253159]])
Content source: JonathonBeauregardII/Advanced-Data-Mining-Project-IBM
Similar notebooks: